In [1]:
from sklearn.datasets import fetch_california_housing

In [2]:
housing  = fetch_california_housing()

In [6]:
X = housing.data
y = housing.target

print(X)

print("-"*50)

print(y)

[[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]]
--------------------------------------------------
[4.526 3.585 3.521 ... 0.923 0.847 0.894]


In [12]:
print(housing.feature_names)
print("- * "*5)
print(housing.target_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
- * - * - * - * - * 
['MedHouseVal']


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
from sklearn.linear_model import Lasso

lasso = Lasso()

In [16]:
lasso.fit(X_train, y_train)

In [18]:
y_pred = lasso.predict(X_test)

In [17]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [19]:
mse = mean_squared_error(y_test, y_pred)
print(mse)

mae = mean_absolute_error(y_test, y_pred)
print(mae)

r2 = r2_score(y_test, y_pred)
print(r2)

1.3106960720039365
0.9060685490007149
-0.00021908714592466794


In [20]:
parameters = {
  'alpha' : [0.001, 0.01, 0.1, 1, 10, 100] 
}

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
lasso_cv  = GridSearchCV(lasso, parameters, cv=5, n_jobs = -1)

In [23]:
lasso_cv.fit(X_train, y_train)

In [24]:
y_pred2 = lasso_cv.predict(X_test)
print(y_pred2)

[0.72592298 1.76090866 2.69570697 ... 4.46264973 1.19821254 2.00356126]


In [25]:
mae2 = mean_absolute_error(y_test, y_pred2)
print(mae2)

mse2 = mean_squared_error(y_test, y_pred2)
print(mse2)

r2 = r2_score(y_test, y_pred2)
print(r2)

0.5331447750392388
0.5544913600832685
0.5768562568705682


In [26]:
print(lasso_cv.best_estimator_)

Lasso(alpha=0.001)


In [27]:
lasso3 = Lasso(alpha=0.001)
lasso3.fit(X_train, y_train)

In [28]:
lasso3.intercept_

2.071946937378622

In [29]:
lasso3.coef_

array([ 0.84914038,  0.12334631, -0.28127333,  0.32604963, -0.00106185,
       -0.03988954, -0.88582217, -0.85809324])

In [31]:
import pandas as pd

In [33]:
feature_names = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']

In [34]:
df = pd.DataFrame({'Features' : feature_names, 'Coefficients' : lasso3.coef_})

In [35]:
df

Unnamed: 0,Features,Coefficients
0,MedInc,0.84914
1,HouseAge,0.123346
2,AveRooms,-0.281273
3,AveBedrms,0.32605
4,Population,-0.001062
5,AveOccup,-0.03989
6,Latitude,-0.885822
7,Longitude,-0.858093
