In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv('/content/cirrhosis.csv')
df = df.drop(['N_Days'], axis=1)
df['Sex'] = df['Sex'].map({'M': 1, 'F': 0})
df['Drug'] = LabelEncoder().fit_transform(df['Drug'])

binary_cols = ['Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
for col in binary_cols:
    df[col] = df[col].map({'Y': 1, 'N': 0, 'S': 1})
numeric_cols = df.select_dtypes(include=[np.number]).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())
df['Status'] = df['Status'].apply(lambda x: 0 if x == 0 else 1)
X = df[['Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema',
        'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos',
        'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin', 'Stage']]
y = df['Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred = rf.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.show()
with open('rf_acc_68.pkl', 'wb') as f:
    pickle.dump(rf, f)

with open('normalizer.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print(X.columns.tolist())
print("Number of features in X:", X.shape[1])