# Python Example for Ridge Regression 
# Sanjay Gupta
# Date: 15-August-2021

# Step # 1: Importing Libraries

In [7]:
import pandas as pd
from numpy import arange
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import RepeatedKFold

In [8]:
#specify URL where data is located
url = 'https://raw.githubusercontent.com/sanjaygupta1963/Pythoncoding/main/price_train_r_exam_sem2.csv'

# Load Dataset from the Github URL
dfcombine = pd.read_csv(url)

# Step # 2: Cleaning the Data

In [9]:
# Let us Clean the Combine Datasets
# Removing the $ sign from the cleaning_fee and price Columns 
dfcombine['price'] = dfcombine['price'].str.replace('$','')
dfcombine['price'] = dfcombine['price'].str.replace(',','')
dfcombine['cleaning_fee'] = dfcombine['cleaning_fee'].str.replace('$','')
dfcombine['cleaning_fee'] = dfcombine['cleaning_fee'].str.replace(',','')

dfcombine[["price"]] = dfcombine[["price"]].apply(pd.to_numeric)
dfcombine[["cleaning_fee"]] = dfcombine[["cleaning_fee"]].apply(pd.to_numeric)

# Convert a single column of boolean values to a column of integers 1 or 0
dfcombine["instant_bookable"] = dfcombine["instant_bookable"].astype(int)

# Dropping irrelevant Columns "X1", "id", "latitude", "longitude"
dfcombine.drop(["X1", "id", "latitude","host_response_rate","host_is_superhost","host_listings_count","longitude", "amenities","cancellation_policy","bed_type","property_type","host_response_time","neighbourhood_group_cleansed","room_type","host_identity_verified"], axis=1, inplace=True)

dfcombine.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3466 entries, 0 to 3465
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   accommodates      3466 non-null   int64  
 1   guests_included   3466 non-null   int64  
 2   bathrooms         3466 non-null   float64
 3   bedrooms          3466 non-null   int64  
 4   beds              3466 non-null   int64  
 5   cleaning_fee      3466 non-null   float64
 6   minimum_nights    3466 non-null   int64  
 7   maximum_nights    3466 non-null   int64  
 8   instant_bookable  3466 non-null   int32  
 9   price             3466 non-null   float64
dtypes: float64(3), int32(1), int64(6)
memory usage: 257.4 KB


  dfcombine['price'] = dfcombine['price'].str.replace('$','')
  dfcombine['cleaning_fee'] = dfcombine['cleaning_fee'].str.replace('$','')


In [10]:
#view first six rows of data
dfcombine[0:6]

Unnamed: 0,accommodates,guests_included,bathrooms,bedrooms,beds,cleaning_fee,minimum_nights,maximum_nights,instant_bookable,price
0,5,4,2.0,2,2,129.0,2,1125,1,279.0
1,2,1,1.0,0,1,50.0,2,30,1,99.0
2,4,2,1.0,1,2,25.0,2,28,0,75.0
3,2,1,1.0,1,1,0.0,2,30,0,70.0
4,8,6,2.5,3,3,99.0,2,12,0,242.0
5,8,6,3.5,3,3,100.0,2,1125,0,430.0


# Step #3: Fit the Ridge Regression Model

In [11]:
print(dfcombine.columns)

Index(['accommodates', 'guests_included', 'bathrooms', 'bedrooms', 'beds',
       'cleaning_fee', 'minimum_nights', 'maximum_nights', 'instant_bookable',
       'price'],
      dtype='object')


In [12]:
#define predictor (input) and response (output) variables
X = dfcombine[['accommodates', 'guests_included', 'bathrooms', 'bedrooms', 'beds',
       'cleaning_fee', 'minimum_nights', 'maximum_nights', 'instant_bookable']]
y = dfcombine['price']

#define cross-validation method to evaluate model
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

#define model
model = RidgeCV(alphas=arange(0, 1, 0.01), cv=cv, scoring='neg_mean_absolute_error')

#fit model
model.fit(X, y)

#display lambda that produced the lowest test MSE
print(model.alpha_)

0.99


# The lambda value that minimizes the test MSE turns out to be 0.99.

# Step #4: Use the Model to Make Predictions of the Output Variable 'Price'

In [13]:
#define new observation
new = [8,5,4,5,5,100,2,30,1]

#predict hp value using lasso regression model
model.predict([new])

array([320.85939207])

In [14]:
# Based on the New input values, the model predicts the Price of the Room of 320.65292266 .