In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler 
from sklearn.preprocessing import OrdinalEncoder 
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [2]:
df = pd.read_csv('data/day_cat.csv')

In [3]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,spring,0,1,0,saturday,0,mist,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,spring,0,1,0,sunday,0,mist,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,spring,0,1,0,monday,1,clear,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,spring,0,1,0,tuesday,1,clear,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,spring,0,1,0,wednesday,1,clear,0.226957,0.22927,0.436957,0.1869,82,1518,1600


In [4]:
df=df.drop(labels=['dteday','instant','atemp','casual','registered'],axis=1)
df.head()

Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,hum,windspeed,cnt
0,spring,0,1,0,saturday,0,mist,0.344167,0.805833,0.160446,985
1,spring,0,1,0,sunday,0,mist,0.363478,0.696087,0.248539,801
2,spring,0,1,0,monday,1,clear,0.196364,0.437273,0.248309,1349
3,spring,0,1,0,tuesday,1,clear,0.2,0.590435,0.160296,1562
4,spring,0,1,0,wednesday,1,clear,0.226957,0.436957,0.1869,1600


In [5]:
X = df.drop(labels=['cnt'],axis=1)
Y = df[['cnt']]

In [6]:
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns

In [7]:
categorical_cols

Index(['season', 'weekday', 'weathersit'], dtype='object')

In [8]:
numerical_cols

Index(['yr', 'mnth', 'holiday', 'workingday', 'temp', 'hum', 'windspeed'], dtype='object')

In [9]:
season_map = ['spring', 'summer', 'fall', 'winter']
week_map = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']
weather_map = ['clear', 'mist', 'light_snow']

In [10]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical Pipeline
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('ordinalencoder', OrdinalEncoder(categories=[season_map, week_map, weather_map]))
    # No scaler here since OrdinalEncoder outputs are already numerical and scaled
])

# Preprocessor Pipeline
preprocessor = ColumnTransformer([
    ('num_pipeline', num_pipeline, numerical_cols),
    ('cat_pipeline', cat_pipeline, categorical_cols)
])


In [11]:
df['weathersit'].unique()

array(['mist', 'clear', 'light_snow'], dtype=object)

In [12]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.30,random_state=30)

In [13]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [14]:
X_train.head()

Unnamed: 0,num_pipeline__yr,num_pipeline__mnth,num_pipeline__holiday,num_pipeline__workingday,num_pipeline__temp,num_pipeline__hum,num_pipeline__windspeed,cat_pipeline__season,cat_pipeline__weekday,cat_pipeline__weathersit
0,-1.001959,-0.413519,-0.191079,-1.448092,0.891692,0.714407,0.505799,1.0,6.0,0.0
1,-1.001959,-0.125633,-0.191079,-1.448092,0.851103,0.168905,-0.696239,1.0,0.0,1.0
2,-1.001959,-0.701404,-0.191079,0.690564,0.079895,0.26326,-0.442693,1.0,2.0,1.0
3,0.998045,-0.413519,-0.191079,0.690564,0.079895,-0.541728,1.610136,1.0,4.0,0.0
4,0.998045,-1.277175,-0.191079,-1.448092,-1.967638,-1.160949,2.86148,0.0,0.0,0.0


In [15]:
regression=LinearRegression()
regression.fit(X_train,y_train)

In [16]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [17]:
models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet()
}
trained_model_list=[]
model_list=[]
r2_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 810.8654967988782
MAE: 616.6171243979056
R2 score 81.46001735710516


Lasso
Model Training Performance
RMSE: 811.3070858160305
MAE: 616.5751006898212
R2 score 81.4398184904916


Ridge
Model Training Performance
RMSE: 811.1834817618424
MAE: 616.5572637152495
R2 score 81.44547341195113


Elasticnet
Model Training Performance
RMSE: 989.6835547459644
MAE: 785.6238213191865
R2 score 72.38122649061698


