In [45]:
## Importing Libraries
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, r2_score, classification_report, roc_auc_score

## Classification

In [46]:
df = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\covid_toy.csv")
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [47]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [48]:
df['fever'] = df['fever'].fillna(df['fever'].mean())

In [49]:
lb = LabelEncoder()
cat_columns = [feature for feature in df.columns if df[feature].dtype == 'O']

for i in cat_columns:
    df[i] = lb.fit_transform(df[i])

In [50]:
x = df.drop(columns= ['has_covid'])
y = df['has_covid']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [51]:
def classification_evaluate(y_true, y_pred, y_proba=None):
    accuracy = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_proba) if y_proba is not None else None
    return accuracy, report, roc_auc

In [52]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier()
}

for model_name, model in models.items():

    model.fit(x_train, y_train)

    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)

    y_train_proba = model.predict_proba(x_train)[:, 1]
    y_test_proba = model.predict_proba(x_test)[:, 1]

    train_acc, train_report, train_roc = classification_evaluate(
        y_train, y_train_pred, y_train_proba
    )
    test_acc, test_report, test_roc = classification_evaluate(
        y_test, y_test_pred, y_test_proba
    )

    print(model_name)

    print("Model Performance on Training Set")
    print(f"- Accuracy  : {train_acc:.4f}")
    print(f"- ROC-AUC   : {train_roc:.4f}")
    print("Classification Report:\n", train_report)

    print("-" * 60)

    print("Model Performance on Test Set")
    print(f"- Accuracy  : {test_acc:.4f}")
    print(f"- ROC-AUC   : {test_roc:.4f}")
    print("Classification Report:\n", test_report)

    print("=" * 60, "\n")


Logistic Regression
Model Performance on Training Set
- Accuracy  : 0.6250
- ROC-AUC   : 0.6197
Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.69      0.66        42
           1       0.62      0.55      0.58        38

    accuracy                           0.62        80
   macro avg       0.62      0.62      0.62        80
weighted avg       0.62      0.62      0.62        80

------------------------------------------------------------
Model Performance on Test Set
- Accuracy  : 0.7500
- ROC-AUC   : 0.7253
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.69      0.78        13
           1       0.60      0.86      0.71         7

    accuracy                           0.75        20
   macro avg       0.75      0.77      0.74        20
weighted avg       0.80      0.75      0.76        20


Decision Tree Classifier
Model Performance on Training Set
- Accurac

## Regression

In [53]:
df = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\insurance - insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [54]:
df.isnull().sum()

age         0
sex         0
bmi         0
children    0
smoker      0
region      0
charges     0
dtype: int64

In [55]:
lb = LabelEncoder()
cat_columns = [feature for feature in df.columns if df[feature].dtype == 'O']

for i in cat_columns:
    df[i] = lb.fit_transform(df[i])

In [56]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [57]:
x = df.drop(columns= ['charges'])
y = df['charges']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [58]:
## Create a function to evaluate Model
def evaluate_model(true, predicted):
    mse = mean_squared_error(true, predicted)
    mae = mean_absolute_error(true, predicted)
    rmse = np.sqrt(mse)
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square, mse

In [59]:
models = {
    "Random Forest" : RandomForestRegressor(),
    "Decision Tree" : DecisionTreeRegressor(),
    "Linear Regression" : LinearRegression(),
    "Ridge" : Ridge(alpha= 0.5),
    "Lasso" : Lasso(alpha= 0.2),
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(x_train, y_train)

    ## Make Prediction
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)

    ## Evaluate Train and Test Dataset
    model_train_mae, model_train_rmse, model_train_r2, model_train_mse = evaluate_model(y_train, y_train_pred)
    model_test_mae, model_test_rmse, model_test_r2, model_test_mse = evaluate_model(y_test, y_test_pred)

    print(list(models.keys())[i])

    print("Model Perfromance for Training Set")
    print("- Root mean Squared Error : {:.4f}".format(model_train_rmse))
    print("- Mean Absolute Error : {:.4f}".format(model_train_mae))
    print("- R-Sqaured : {:.4f}".format(model_train_r2))
    print("- Mean Squared Error : {:.4f}".format(model_train_mse))

    print("---------------------------------------------------------------")

    print("Model Perfromance for Test Set")
    print("- Root mean Squared Error : {:.4f}".format(model_test_rmse))
    print("- Mean Absolute Error : {:.4f}".format(model_test_mae))
    print("- R-Sqaured : {:.4f}".format(model_test_r2))
    print("- Mean Squared Error : {:.4f}".format(model_test_mse))

    print('=' * 35)
    print("\n")

Random Forest
Model Perfromance for Training Set
- Root mean Squared Error : 1865.7295
- Mean Absolute Error : 1029.2775
- R-Sqaured : 0.9760
- Mean Squared Error : 3480946.4383
---------------------------------------------------------------
Model Perfromance for Test Set
- Root mean Squared Error : 4759.6072
- Mean Absolute Error : 2573.2888
- R-Sqaured : 0.8499
- Mean Squared Error : 22653860.9688


Decision Tree
Model Perfromance for Training Set
- Root mean Squared Error : 427.3736
- Mean Absolute Error : 19.0841
- R-Sqaured : 0.9987
- Mean Squared Error : 182648.1711
---------------------------------------------------------------
Model Perfromance for Test Set
- Root mean Squared Error : 6436.4099
- Mean Absolute Error : 2946.4391
- R-Sqaured : 0.7254
- Mean Squared Error : 41427371.8520


Linear Regression
Model Perfromance for Training Set
- Root mean Squared Error : 6083.6907
- Mean Absolute Error : 4182.6699
- R-Sqaured : 0.7449
- Mean Squared Error : 37011292.5832
-----------