# Lasso Resgression

## Boston Dataset
Boston.csv

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder
from sklearn.metrics import r2_score,  root_mean_squared_error, mean_absolute_error
from sklearn.compose import ColumnTransformer, make_column_selector
import os
os.chdir('../Datasets/')

In [2]:
boston = pd.read_csv('Boston.csv')
X = boston.drop('medv', axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

### Hyper-parameter Tuning

In [3]:
alphas = np.linspace(0.001, 10, 1000)
scores=[]
for a in alphas:
    lasso = Lasso(alpha = a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
2,0.021018,3.073399
3,0.031027,3.078903
1,0.011009,3.086172
4,0.041036,3.090440
5,0.051045,3.110526
...,...,...
995,9.959964,3.940910
996,9.969973,3.941217
997,9.979982,3.941524
998,9.989991,3.941832


## Housing Dataset
Housing.csv

In [4]:
housing = pd.read_csv('Housing.csv')
X, y = housing.drop('price', axis=1), housing['price']

In [5]:
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform='pandas')

col_trnf = ColumnTransformer([('OHE', ohe, list(housing.columns[housing.dtypes==object]))],
                             remainder='passthrough',
                             verbose_feature_names_out=False)

col_trnf = col_trnf.set_output(transform='pandas')
X = col_trnf.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

### Hyper-parameter Tuning

using np.linspace

In [6]:
alphas = np.linspace(0.001, 10, 1000)
scores=[]
for a in alphas:
    lasso = Lasso(alpha = a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
999,10.000000,11594.596229
998,9.989991,11594.602598
997,9.979982,11594.608967
996,9.969973,11594.615335
995,9.959964,11594.621704
...,...,...
4,0.041036,11600.915827
3,0.031027,11600.922041
2,0.021018,11600.928410
1,0.011009,11600.934779


## Exp_Salaries Dataset
Exp_Salaries.csv

In [7]:
sals = pd.read_csv('Exp_Salaries.csv')
X, y = sals.drop('Salary', axis=1), sals['Salary']
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform='pandas')

col_trnf = ColumnTransformer ([('OHE',ohe, make_column_selector(dtype_include=object))],
                              remainder='passthrough',
                              verbose_feature_names_out=False)
col_trnf = col_trnf.set_output(transform='pandas')
X = col_trnf.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

### Hyper-parameter Tuning

In [8]:
alphas = np.linspace(0.001, 1, 1000)
scores=[]
for a in alphas:
    lasso = Lasso(alpha = a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
999,1.000,3034.694328
998,0.999,3034.694764
997,0.998,3034.695199
996,0.997,3034.695635
995,0.996,3034.696071
...,...,...
4,0.005,3035.199534
3,0.004,3035.199970
2,0.003,3035.200510
1,0.002,3035.200947


`pd.getdummies()`

In [9]:
pd.get_dummies(housing, drop_first=True)

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,garagepl,driveway_yes,recroom_yes,fullbase_yes,gashw_yes,airco_yes,prefarea_yes
0,42000.0,5850,3,1,2,1,True,False,True,False,False,False
1,38500.0,4000,2,1,1,0,True,False,False,False,False,False
2,49500.0,3060,3,1,1,0,True,False,False,False,False,False
3,60500.0,6650,3,1,2,0,True,True,False,False,False,False
4,61000.0,6360,2,1,1,0,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
541,91500.0,4800,3,2,4,0,True,True,False,False,True,False
542,94000.0,6000,3,2,4,0,True,False,False,False,True,False
543,103000.0,6000,3,2,4,1,True,True,False,False,True,False
544,105000.0,6000,3,2,2,1,True,True,False,False,True,False
