# Lasso Regression

In [4]:
import numpy as np
import pandas as pd

import sklearn.model_selection as skm
from sklearn.model_selection import train_test_split
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from ISLP.models import ModelSpec as MS

df = pd.read_excel("./Goodbelly_Project_Data.xlsx")
df = df.drop(['Date', 'Region', 'Store'], axis=1)
#Standardize 
scaler = StandardScaler()
numerical_feature = df.select_dtypes(include=['float64','int64','bool']).columns
df[numerical_feature]= scaler.fit_transform(df[numerical_feature])

#Categorize 
categorical_feature = df.select_dtypes(include=['object']).columns
df = pd.get_dummies(df, columns=categorical_feature,drop_first=True,dtype=float)

#Select Independent and Dependent variables 
X = df.drop(['Sales'], axis=1)
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y)

# Select Lasso Regression l1_ration=1
elastic_net = skl.ElasticNetCV(n_alphas=100, l1_ratio=1)

#Fit model
elastic_net.fit(X_train, y_train)

#Make preditions
y_pred = elastic_net.predict(X_test)

#Declare desired measurments 
mse_ = mean_squared_error(y_test,y_pred)
rmse_ = np.sqrt(mse_)
mae_ =mean_absolute_error(y_test,y_pred)
r2_ = r2_score(y_test,y_pred)
mape_ = np.mean(np.abs((y_test - y_pred) / y_test))*100

#obtain Coefficients 
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': elastic_net.coef_})

#Obtain dropped(0-coefficient) features
dropped_features = coefficients['Coefficient'].value_counts().get(0, 0) 
features = (len(X.columns))

#Obtain desired results
results =[{
    "dataset":"Goodbelly",
    "R^2":r2_,
    "MSE":mse_,
    "RMSE":rmse_,
    "MAE":mae_,
    "MAPE":mape_,
    "Features":features,
    "Dropped Features":dropped_features}]

In [5]:
df = pd.DataFrame(results)

In [6]:
with pd.ExcelWriter('GoodBelly Lasso Regression.xlsx', engine='openpyxl') as writer:
    df.to_excel(writer, sheet_name='results', index=False)
    coefficients.to_excel(writer, sheet_name='coef', index=False,header=True)
    