In [96]:
import pandas as pd

# Sample data (normally you'd load this from CSV)
data = {
    'Age': [45, 60, 55, 38, 70],
    'Bilirubin': [1.2, 3.4, 0.9, 1.5, 2.1],
    'Albumin': [3.4, 2.1, 3.8, 4.1, 2.8],
    'Prothrombin': [12.0, 18.5, 11.5, 10.2, 17.1],
    'Alk_Phos': [100, 180, 95, 110, 150],
    'SGOT': [55, 120, 40, 45, 110],
    'SGPT': [50, 135, 45, 50, 100],
    'Cholesterol': [210, 280, 190, 175, 250],
    'Has_Cirrhosis': [1, 1, 0, 0, 1]  # Target variable
}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,Age,Bilirubin,Albumin,Prothrombin,Alk_Phos,SGOT,SGPT,Cholesterol,Has_Cirrhosis
0,45,1.2,3.4,12.0,100,55,50,210,1
1,60,3.4,2.1,18.5,180,120,135,280,1
2,55,0.9,3.8,11.5,95,40,45,190,0
3,38,1.5,4.1,10.2,110,45,50,175,0
4,70,2.1,2.8,17.1,150,110,100,250,1


In [97]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib
X = df.drop('Has_Cirrhosis', axis=1)
y = df['Has_Cirrhosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

joblib.dump(model, 'model.pkl')
print(" Model trained and saved as model.pkl")


 Model trained and saved as model.pkl


In [98]:

patient_data = {
    'Age': 50,
    'Bilirubin': 1.1,
    'Albumin': 3.3,
    'Prothrombin': 11.8,
    'Alk_Phos': 120,
    'SGOT': 54,
    'SGPT': 47,
    'Cholesterol': 205
}

input_df = pd.DataFrame([patient_data])
model = joblib.load('model.pkl')
prediction = model.predict(input_df)

try:
    probability = model.predict_proba(input_df)
    confidence = round(max(probability[0]) * 100, 2)
except:
    confidence = "Not available"
print("🧾 Patient Input:\n", input_df)
print("\n Prediction:")
if prediction[0] == 1:
    print(" Likely has liver cirrhosis.")
else:
    print("Unlikely to have liver cirrhosis.")
print(" Confidence:", confidence if isinstance(confidence, str) else f"{confidence}%")


🧾 Patient Input:
    Age  Bilirubin  Albumin  Prothrombin  Alk_Phos  SGOT  SGPT  Cholesterol
0   50        1.1      3.3         11.8       120    54    47          205

 Prediction:
 Likely has liver cirrhosis.
 Confidence: 57.0%


In [99]:
Age,Bilirubin,Albumin,Prothrombin,Alk_Phos,SGOT,SGPT,Cholesterol


(45.0, 67.0, 78.0, 45.0, 34.0, 23.0, 567.0, 54.0)

In [100]:
from google.colab import files
import pandas as pd
uploaded = files.upload()


Saving cirrhosis.csv to cirrhosis (10).csv


In [101]:
import joblib
model = joblib.load('model.pkl')


In [102]:

missing = [col for col in required_features if col not in df.columns]
if missing:
    print(f" Missing columns: {missing}")
else:
    print(" All required features are present.")


 All required features are present.


In [103]:
if 'Cirrhosis_Prediction' in df.columns:
    sns.countplot(data=df, x='Cirrhosis_Prediction', palette='Set2')
    plt.title('Prediction Distribution')
    plt.show()
else:
    print("'Cirrhosis_Prediction' column not found in DataFrame.")


'Cirrhosis_Prediction' column not found in DataFrame.


In [114]:

required_columns = ['Cirrhosis_Prediction', 'Age', 'Albumin', 'SGOT']
missing_cols = [col for col in required_columns if col not in df.columns]

if missing_cols:
    print(f" Missing columns: {missing_cols}")
else:
    summary = df.groupby('Cirrhosis_Prediction')[['Age', 'Albumin', 'SGOT']].mean()
    print(" Group Summary (Averages):")
    print(summary)


 Missing columns: ['Cirrhosis_Prediction']


In [105]:
print(df.columns)


Index(['Age', 'Bilirubin', 'Albumin', 'Prothrombin', 'Alk_Phos', 'SGOT',
       'SGPT', 'Cholesterol', 'Has_Cirrhosis'],
      dtype='object')


In [106]:
required_features = ['Age', 'Bilirubin', 'Albumin', 'Prothrombin', 'Alk_Phos', 'SGOT', 'SGPT', 'Cholesterol']
missing = [col for col in required_features if col not in df.columns]
if missing:
    print("Missing columns:", missing)
else:
    df_filtered = df[required_features]
    predictions = model.predict(df_filtered)
    if len(predictions) == len(df):
        df['Cirrhosis_Prediction'] = ['Yes' if p == 1 else 'No' for p in predictions]
        print(" Predictions added to the DataFrame.")
    else:
        print("Length mismatch between predictions and input data.")


 Predictions added to the DataFrame.


In [107]:
import pandas as pd
data = {
    'Age': [50, 65, 40, 70, 55],
    'Bilirubin': [1.1, 2.5, 0.9, 3.1, 1.8],
    'Albumin': [3.3, 2.0, 4.1, 2.2, 3.6],
    'Prothrombin': [11.8, 15.5, 10.2, 17.0, 12.5],
    'Alk_Phos': [120, 180, 110, 170, 140],
    'SGOT': [54, 120, 45, 130, 60],
    'SGPT': [47, 110, 50, 125, 58],
    'Cholesterol': [205, 250, 175, 290, 220]
}

df_test = pd.DataFrame(data)
df_test.to_csv('test_patients.csv', index=False)
print(" test_patients.csv file created.")


 test_patients.csv file created.


In [108]:
from google.colab import files
files.upload()


Saving cirrhosis.csv to cirrhosis (11).csv


{'cirrhosis (11).csv': b'ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage\n1,400,D,D-penicillamine,21464,F,Y,Y,Y,Y,14.5,261,2.6,156,1718,137.95,172,190,12.2,4\n2,4500,C,D-penicillamine,20617,F,N,Y,Y,N,1.1,302,4.14,54,7394.8,113.52,88,221,10.6,3\n3,1012,D,D-penicillamine,25594,M,N,N,N,S,1.4,176,3.48,210,516,96.1,55,151,12,4\n4,1925,D,D-penicillamine,19994,F,N,Y,Y,S,1.8,244,2.54,64,6121.8,60.63,92,183,10.3,4\n5,1504,CL,Placebo,13918,F,N,Y,Y,N,3.4,279,3.53,143,671,113.15,72,136,10.9,3\n6,2503,D,Placebo,24201,F,N,Y,N,N,0.8,248,3.98,50,944,93,63,NA,11,3\n7,1832,C,Placebo,20284,F,N,Y,N,N,1,322,4.09,52,824,60.45,213,204,9.7,3\n8,2466,D,Placebo,19379,F,N,N,N,N,0.3,280,4,52,4651.2,28.38,189,373,11,3\n9,2400,D,D-penicillamine,15526,F,N,N,Y,N,3.2,562,3.08,79,2276,144.15,88,251,11,2\n10,51,D,Placebo,25772,F,Y,N,Y,Y,12.6,200,2.74,140,918,147.25,143,302,11.5,4\n11,3762,D,Placebo,19619,F,N,Y,Y

In [109]:
!pip install joblib pandas scikit-learn
import pandas as pd
import joblib
from google.colab import files




In [110]:
model = joblib.load("model.pkl")
print(" Model loaded")


 Model loaded


In [111]:
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

print("📄 Preview of uploaded data:")
df.head()


Saving cirrhosis.csv to cirrhosis (12).csv
📄 Preview of uploaded data:


Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,1,400,D,D-penicillamine,21464,F,Y,Y,Y,Y,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,4.0
1,2,4500,C,D-penicillamine,20617,F,N,Y,Y,N,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,3.0
2,3,1012,D,D-penicillamine,25594,M,N,N,N,S,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,4.0
3,4,1925,D,D-penicillamine,19994,F,N,Y,Y,S,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,4.0
4,5,1504,CL,Placebo,13918,F,N,Y,Y,N,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,3.0


In [115]:

required_features = ['Age', 'Bilirubin', 'Albumin', 'Prothrombin',
                     'Alk_Phos', 'SGOT', 'SGPT', 'Cholesterol']
missing = [col for col in required_features if col not in df.columns]
if missing:
    print(f" Missing required columns: {missing}")
else:
    df_filtered = df[required_features]
    predictions = model.predict(df_filtered)

    df['Cirrhosis_Prediction'] = ['Yes' if p == 1 else 'No' for p in predictions]

    print("Predictions done. Here's the result:")
    df.head()


 Missing required columns: ['SGPT']


In [113]:

df.to_csv("prediction_results.csv", index=False)
files.download("prediction_results.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [116]:

df.to_csv("prediction_results.csv", index=False)
files.download("prediction_results.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [117]:
import pandas as pd
import joblib
from google.colab import files


In [124]:
uploaded = files.upload()
model = joblib.load('model.pkl')
print(" Model loaded successfully")


Saving prediction_results (3).csv to prediction_results (3) (1).csv
 Model loaded successfully


In [120]:
uploaded = files.upload()  # Upload your CSV (e.g. test_patients.csv)
csv_file = list(uploaded.keys())[0]
df = pd.read_csv(csv_file)

print("📄 Uploaded Patient Data Preview:")
display(df.head())


Saving prediction_results (2).csv to prediction_results (2).csv
📄 Uploaded Patient Data Preview:


Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,1,400,D,D-penicillamine,21464,F,Y,Y,Y,Y,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,4.0
1,2,4500,C,D-penicillamine,20617,F,N,Y,Y,N,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,3.0
2,3,1012,D,D-penicillamine,25594,M,N,N,N,S,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,4.0
3,4,1925,D,D-penicillamine,19994,F,N,Y,Y,S,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,4.0
4,5,1504,CL,Placebo,13918,F,N,Y,Y,N,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,3.0


In [131]:

required_features = ['Age', 'Bilirubin', 'Albumin', 'Prothrombin',
                     'Alk_Phos', 'SGOT', 'SGPT', 'Cholesterol']
missing = [col for col in required_features if col not in df.columns]
if missing:
    print(f" Missing columns in uploaded file: {missing}")
else:
    input_df = df[required_features]
    predictions = model.predict(input_df)

    df['Cirrhosis_Prediction'] = ['Yes' if p == 1 else 'No' for p in predictions]
    print("Prediction Results:")
    display(df.head())

    print("\n Prediction Summary:")
    print(df['Cirrhosis_Prediction'].value_counts())


 Missing columns in uploaded file: ['SGPT']


In [130]:
import pandas as pd
import joblib

model = joblib.load("model.pkl")
print(" Model loaded.")


 Model loaded.


In [132]:

sample_data = pd.DataFrame({
    'Age': [50, 65, 40, 70, 55],
    'Bilirubin': [1.1, 2.5, 0.9, 3.1, 1.8],
    'Albumin': [3.3, 2.0, 4.1, 2.2, 3.6],
    'Prothrombin': [11.8, 15.5, 10.2, 17.0, 12.5],
    'Alk_Phos': [120, 180, 110, 170, 140],
    'SGOT': [54, 120, 45, 130, 60],
    'SGPT': [47, 110, 50, 125, 58],
    'Cholesterol': [205, 250, 175, 290, 220]
})

print("Sample Patient Input:")
display(sample_data)


Sample Patient Input:


Unnamed: 0,Age,Bilirubin,Albumin,Prothrombin,Alk_Phos,SGOT,SGPT,Cholesterol
0,50,1.1,3.3,11.8,120,54,47,205
1,65,2.5,2.0,15.5,180,120,110,250
2,40,0.9,4.1,10.2,110,45,50,175
3,70,3.1,2.2,17.0,170,130,125,290
4,55,1.8,3.6,12.5,140,60,58,220


In [133]:

predictions = model.predict(sample_data)
sample_data['Cirrhosis_Prediction'] = ['Yes' if p == 1 else 'No' for p in predictions]
print("Prediction Output:")
display(sample_data)
print("\nPrediction Summary:")
print(sample_data['Cirrhosis_Prediction'].value_counts())


Prediction Output:


Unnamed: 0,Age,Bilirubin,Albumin,Prothrombin,Alk_Phos,SGOT,SGPT,Cholesterol,Cirrhosis_Prediction
0,50,1.1,3.3,11.8,120,54,47,205,Yes
1,65,2.5,2.0,15.5,180,120,110,250,Yes
2,40,0.9,4.1,10.2,110,45,50,175,No
3,70,3.1,2.2,17.0,170,130,125,290,Yes
4,55,1.8,3.6,12.5,140,60,58,220,Yes



Prediction Summary:
Cirrhosis_Prediction
Yes    4
No     1
Name: count, dtype: int64
