# Regularization

Thanks for the Dataset: [Dataset](https://github.com/codebasics/py/blob/master/ML/16_regularization/Melbourne_housing_FULL.csv)

## Import Necessary Libraries

In [27]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge

## Data Preparation

In [28]:
dataframe = pd.read_csv('../data/Melbourne_housing_FULL.csv')
dataframe.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,68 Studley St,2,h,,SS,Jellis,3/09/2016,2.5,3067.0,...,1.0,1.0,126.0,,,Yarra City Council,-37.8014,144.9958,Northern Metropolitan,4019.0
1,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra City Council,-37.7996,144.9984,Northern Metropolitan,4019.0
2,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra City Council,-37.8079,144.9934,Northern Metropolitan,4019.0
3,Abbotsford,18/659 Victoria St,3,u,,VB,Rounds,4/02/2016,2.5,3067.0,...,2.0,1.0,0.0,,,Yarra City Council,-37.8114,145.0116,Northern Metropolitan,4019.0
4,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra City Council,-37.8093,144.9944,Northern Metropolitan,4019.0


In [29]:
dataframe = dataframe.drop(columns=['Address', 'Date', 'Postcode', 'YearBuilt', 'Lattitude', 'Longtitude'])
dataframe.head()

Unnamed: 0,Suburb,Rooms,Type,Price,Method,SellerG,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,CouncilArea,Regionname,Propertycount
0,Abbotsford,2,h,,SS,Jellis,2.5,2.0,1.0,1.0,126.0,,Yarra City Council,Northern Metropolitan,4019.0
1,Abbotsford,2,h,1480000.0,S,Biggin,2.5,2.0,1.0,1.0,202.0,,Yarra City Council,Northern Metropolitan,4019.0
2,Abbotsford,2,h,1035000.0,S,Biggin,2.5,2.0,1.0,0.0,156.0,79.0,Yarra City Council,Northern Metropolitan,4019.0
3,Abbotsford,3,u,,VB,Rounds,2.5,3.0,2.0,1.0,0.0,,Yarra City Council,Northern Metropolitan,4019.0
4,Abbotsford,3,h,1465000.0,SP,Biggin,2.5,3.0,2.0,0.0,134.0,150.0,Yarra City Council,Northern Metropolitan,4019.0


In [30]:
dataframe.shape

(34857, 15)

In [31]:
dataframe.isna().sum()

Suburb               0
Rooms                0
Type                 0
Price             7610
Method               0
SellerG              0
Distance             1
Bedroom2          8217
Bathroom          8226
Car               8728
Landsize         11810
BuildingArea     21115
CouncilArea          3
Regionname           3
Propertycount        3
dtype: int64

In [32]:
columns_fill_zero = ['Propertycount', 'Distance', 'Bedroom2', 'Bathroom', 'Car']

dataframe[columns_fill_zero] = dataframe[columns_fill_zero].fillna(0)

In [33]:
dataframe.isna().sum()

Suburb               0
Rooms                0
Type                 0
Price             7610
Method               0
SellerG              0
Distance             0
Bedroom2             0
Bathroom             0
Car                  0
Landsize         11810
BuildingArea     21115
CouncilArea          3
Regionname           3
Propertycount        0
dtype: int64

In [36]:
dataframe['Landsize'] = dataframe['Landsize'].fillna(dataframe['Landsize'].mean())
dataframe['BuildingArea'] = dataframe['BuildingArea'].fillna(dataframe['BuildingArea'].mean())
dataframe['Price'] = dataframe['Price'].fillna(dataframe['Price'].mean())


In [37]:
dataframe.isna().sum()

Suburb           0
Rooms            0
Type             0
Price            0
Method           0
SellerG          0
Distance         0
Bedroom2         0
Bathroom         0
Car              0
Landsize         0
BuildingArea     0
CouncilArea      3
Regionname       3
Propertycount    0
dtype: int64

In [39]:
dataframe = dataframe.dropna()
dataframe.isna().sum()

Suburb           0
Rooms            0
Type             0
Price            0
Method           0
SellerG          0
Distance         0
Bedroom2         0
Bathroom         0
Car              0
Landsize         0
BuildingArea     0
CouncilArea      0
Regionname       0
Propertycount    0
dtype: int64

In [40]:
dataframe.head()

Unnamed: 0,Suburb,Rooms,Type,Price,Method,SellerG,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,CouncilArea,Regionname,Propertycount
0,Abbotsford,2,h,1050173.0,SS,Jellis,2.5,2.0,1.0,1.0,126.0,160.2564,Yarra City Council,Northern Metropolitan,4019.0
1,Abbotsford,2,h,1480000.0,S,Biggin,2.5,2.0,1.0,1.0,202.0,160.2564,Yarra City Council,Northern Metropolitan,4019.0
2,Abbotsford,2,h,1035000.0,S,Biggin,2.5,2.0,1.0,0.0,156.0,79.0,Yarra City Council,Northern Metropolitan,4019.0
3,Abbotsford,3,u,1050173.0,VB,Rounds,2.5,3.0,2.0,1.0,0.0,160.2564,Yarra City Council,Northern Metropolitan,4019.0
4,Abbotsford,3,h,1465000.0,SP,Biggin,2.5,3.0,2.0,0.0,134.0,150.0,Yarra City Council,Northern Metropolitan,4019.0


In [41]:
dataframe = pd.get_dummies(dataframe, drop_first=True)
dataframe.head()

Unnamed: 0,Rooms,Price,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,Propertycount,Suburb_Aberfeldie,...,CouncilArea_Wyndham City Council,CouncilArea_Yarra City Council,CouncilArea_Yarra Ranges Shire Council,Regionname_Eastern Victoria,Regionname_Northern Metropolitan,Regionname_Northern Victoria,Regionname_South-Eastern Metropolitan,Regionname_Southern Metropolitan,Regionname_Western Metropolitan,Regionname_Western Victoria
0,2,1050173.0,2.5,2.0,1.0,1.0,126.0,160.2564,4019.0,False,...,False,True,False,False,True,False,False,False,False,False
1,2,1480000.0,2.5,2.0,1.0,1.0,202.0,160.2564,4019.0,False,...,False,True,False,False,True,False,False,False,False,False
2,2,1035000.0,2.5,2.0,1.0,0.0,156.0,79.0,4019.0,False,...,False,True,False,False,True,False,False,False,False,False
3,3,1050173.0,2.5,3.0,2.0,1.0,0.0,160.2564,4019.0,False,...,False,True,False,False,True,False,False,False,False,False
4,3,1465000.0,2.5,3.0,2.0,0.0,134.0,150.0,4019.0,False,...,False,True,False,False,True,False,False,False,False,False


## Assigning X and y

In [42]:
X = dataframe.drop(columns=['Price'])
X.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,Propertycount,Suburb_Aberfeldie,Suburb_Airport West,...,CouncilArea_Wyndham City Council,CouncilArea_Yarra City Council,CouncilArea_Yarra Ranges Shire Council,Regionname_Eastern Victoria,Regionname_Northern Metropolitan,Regionname_Northern Victoria,Regionname_South-Eastern Metropolitan,Regionname_Southern Metropolitan,Regionname_Western Metropolitan,Regionname_Western Victoria
0,2,2.5,2.0,1.0,1.0,126.0,160.2564,4019.0,False,False,...,False,True,False,False,True,False,False,False,False,False
1,2,2.5,2.0,1.0,1.0,202.0,160.2564,4019.0,False,False,...,False,True,False,False,True,False,False,False,False,False
2,2,2.5,2.0,1.0,0.0,156.0,79.0,4019.0,False,False,...,False,True,False,False,True,False,False,False,False,False
3,3,2.5,3.0,2.0,1.0,0.0,160.2564,4019.0,False,False,...,False,True,False,False,True,False,False,False,False,False
4,3,2.5,3.0,2.0,0.0,134.0,150.0,4019.0,False,False,...,False,True,False,False,True,False,False,False,False,False


In [43]:
y = dataframe['Price']
y[:5]

0    1.050173e+06
1    1.480000e+06
2    1.035000e+06
3    1.050173e+06
4    1.465000e+06
Name: Price, dtype: float64

## Splitting the Data into Training and Test Data