In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier


In [2]:
df = pd.read_csv("/content/medical_insurance.csv")
df


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
2767,47,female,45.320,1,no,southeast,8569.86180
2768,21,female,34.600,0,no,southwest,2020.17700
2769,19,male,26.030,1,yes,northwest,16450.89470
2770,23,male,18.715,0,no,northwest,21595.38229


In [3]:
df = df.drop_duplicates()
df = df.dropna()


In [4]:
le = LabelEncoder()
for col in df.select_dtypes(include="object").columns:
    df[col] = le.fit_transform(df[col])


In [5]:
X = df.drop("charges", axis=1)
y = (df["charges"] > df["charges"].median()).astype(int)
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

rf_acc = accuracy_score(y_test, rf_pred)
rf_prec = precision_score(y_test, rf_pred)
rf_rec = recall_score(y_test, rf_pred)
rf_f1 = f1_score(y_test, rf_pred)

rf_acc, rf_prec, rf_rec, rf_f1


(0.9291044776119403, 0.9398496240601504, 0.9191176470588235, 0.929368029739777)

In [8]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

dt_acc = accuracy_score(y_test, dt_pred)
dt_prec = precision_score(y_test, dt_pred)
dt_rec = recall_score(y_test, dt_pred)
dt_f1 = f1_score(y_test, dt_pred)

dt_acc, dt_prec, dt_rec, dt_f1


(0.8843283582089553,
 0.8571428571428571,
 0.9264705882352942,
 0.8904593639575972)

In [9]:
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_pred = nb.predict(X_test)

nb_acc = accuracy_score(y_test, nb_pred)
nb_prec = precision_score(y_test, nb_pred)
nb_rec = recall_score(y_test, nb_pred)
nb_f1 = f1_score(y_test, nb_pred)

nb_acc, nb_prec, nb_rec, nb_f1


(0.7164179104477612, 1.0, 0.4411764705882353, 0.6122448979591837)

In [10]:
pd.DataFrame({
    "Model": ["Random Forest", "Decision Tree", "Naive Bayes"],
    "Accuracy": [rf_acc, dt_acc, nb_acc],
    "Precision": [rf_prec, dt_prec, nb_prec],
    "Recall": [rf_rec, dt_rec, nb_rec],
    "F1 Score": [rf_f1, dt_f1, nb_f1]
})


Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
0,Random Forest,0.929104,0.93985,0.919118,0.929368
1,Decision Tree,0.884328,0.857143,0.926471,0.890459
2,Naive Bayes,0.716418,1.0,0.441176,0.612245


In [11]:
from sklearn.metrics import classification_report

print(classification_report(y_test, rf_pred))
print(classification_report(y_test, dt_pred))
print(classification_report(y_test, nb_pred))


              precision    recall  f1-score   support

           0       0.92      0.94      0.93       132
           1       0.94      0.92      0.93       136

    accuracy                           0.93       268
   macro avg       0.93      0.93      0.93       268
weighted avg       0.93      0.93      0.93       268

              precision    recall  f1-score   support

           0       0.92      0.84      0.88       132
           1       0.86      0.93      0.89       136

    accuracy                           0.88       268
   macro avg       0.89      0.88      0.88       268
weighted avg       0.89      0.88      0.88       268

              precision    recall  f1-score   support

           0       0.63      1.00      0.78       132
           1       1.00      0.44      0.61       136

    accuracy                           0.72       268
   macro avg       0.82      0.72      0.69       268
weighted avg       0.82      0.72      0.69       268

