In [10]:
import numpy as np
import pandas as pd

import sklearn.model_selection as skm
from sklearn.model_selection import train_test_split
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from ISLP.models import ModelSpec as MS

Medical_Cost  = pd.read_csv('data/insurance.csv')

#Columns to drop

#Standardize 
scaler = StandardScaler()
numerical_feature = Medical_Cost .select_dtypes(include=['float64','int64','bool']).columns
Medical_Cost [numerical_feature]= scaler.fit_transform(Medical_Cost [numerical_feature])

#Categorize 
categorical_feature = Medical_Cost .select_dtypes(include=['object']).columns
Medical_Cost  = pd.get_dummies(Medical_Cost , columns=categorical_feature,drop_first=True,dtype=float)

#Select Independent and Dependent variables 
X = Medical_Cost .drop('charges', axis=1)
y = Medical_Cost ['charges']

X_train, X_test, y_train, y_test = train_test_split(X, y)

# Select Ridge Regression l1_ration=0
elastic_net = skl.ElasticNet(alpha=0.5, l1_ratio=0)

#Fit model
elastic_net.fit(X_train, y_train)

#Make preditions
y_pred = elastic_net.predict(X_test)

#Declare desired measurments 
mse_ = mean_squared_error(y_test,y_pred)
rmse_ = np.sqrt(mse_)
mae_ =mean_absolute_error(y_test,y_pred)
r2_ = r2_score(y_test,y_pred)
mape_ = np.mean(np.abs((y_test - y_pred) / y_test))*100

#obtain Coefficients 
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': elastic_net.coef_})

#Obtain desired results
results =[{
    "dataset":"Medical_Cost ",
    "R^2":r2_,
    "MSE":mse_,
    "RMSE":rmse_,
    "MAE":mae_,
    "MAPE":mape_}]

  model = cd_fast.enet_coordinate_descent(


In [11]:
df = pd.DataFrame(results)

In [12]:
with pd.ExcelWriter('Ridge Regression.xlsx', engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
    df.to_excel(writer, sheet_name='results', index=False,startrow=writer.sheets['results'].max_row, header=None)

# Lasso Regression

In [13]:

# Select Lasso Regression l1_ration=1
elastic_net = skl.ElasticNetCV(n_alphas=100, l1_ratio=1)

#Fit model
elastic_net.fit(X_train, y_train)

#Make preditions
y_pred = elastic_net.predict(X_test)

#Declare desired measurments 
mse_ = mean_squared_error(y_test,y_pred)
rmse_ = np.sqrt(mse_)
mae_ =mean_absolute_error(y_test,y_pred)
r2_ = r2_score(y_test,y_pred)
mape_ = np.mean(np.abs((y_test - y_pred) / y_test))*100

#obtain Coefficients 
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': elastic_net.coef_})

#Obtain dropped(0-coefficient) features
dropped_features = coefficients['Coefficient'].value_counts().get(0, 0) 
features = (len(X.columns))

#Obtain desired results
results =[{
    "dataset":"Medical_Cost",
    "R^2":r2_,
    "MSE":mse_,
    "RMSE":rmse_,
    "MAE":mae_,
    "MAPE":mape_,
    "Features":features,
    "Dropped Features":dropped_features}]

In [14]:
df = pd.DataFrame(results)

In [15]:
with pd.ExcelWriter('Lasso Regression.xlsx', engine='openpyxl', mode='a',if_sheet_exists="overlay") as writer:
    df.to_excel(writer, sheet_name='results', index=False,startrow=writer.sheets['results'].max_row, header=None)