In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv('new_data.csv')
df.head()

Unnamed: 0,Weather_conditions,Road_traffic_density,Festival,Vehicle_condition,multiple_deliveries,City,Delivery_person_Age,Delivery_person_Ratings,Distance,Time_taken (min)
0,28.913887,31.150677,25.914473,2,3.0,27.174414,36.0,4.2,10.280582,46
1,25.872212,27.212264,25.914473,1,1.0,27.174414,21.0,4.7,6.242319,23
2,25.89019,26.696435,25.914473,1,1.0,27.174414,23.0,4.7,13.78786,21
3,25.89019,21.320747,25.914473,0,0.0,27.174414,34.0,4.3,2.930258,20
4,28.913887,31.150677,25.914473,1,1.0,27.174414,24.0,4.7,19.396618,41


In [6]:
## Independent and dependent features
X = df.drop(labels=['Time_taken (min)'],axis=1)
y = df[['Time_taken (min)']]

In [7]:
## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=30)

In [10]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the DataFrame
scaled_data = scaler.fit_transform(X_train)

# Convert scaled_data back to a DataFrame
X_train = pd.DataFrame(scaled_data, columns=X_train.columns)

test_scaled = scaler.transform(X_test)
X_test = pd.DataFrame(test_scaled, columns=X_test.columns)

In [11]:
X_train.head()

Unnamed: 0,Weather_conditions,Road_traffic_density,Festival,Vehicle_condition,multiple_deliveries,City,Delivery_person_Age,Delivery_person_Ratings,Distance
0,-0.177864,0.099886,-0.142048,-0.019586,0.443456,0.392384,-0.608571,0.815266,0.445121
1,-0.177864,0.228751,-0.142048,1.179837,0.443456,0.392384,-0.781301,0.815266,-1.181261
2,1.116835,-1.243071,-0.142048,-0.019586,-1.308852,0.392384,1.291454,0.50841,-1.46059
3,-1.888862,0.099886,-0.142048,-0.019586,-1.308852,0.392384,0.773265,-0.412158,1.276217
4,-0.076167,-1.243071,-0.142048,1.179837,0.443456,-1.476636,-1.472219,-0.105302,-1.456357


In [12]:
X_test.head()

Unnamed: 0,Weather_conditions,Road_traffic_density,Festival,Vehicle_condition,multiple_deliveries,City,Delivery_person_Age,Delivery_person_Ratings,Distance
0,1.116835,-1.243071,-0.142048,-1.219008,-1.308852,-1.476636,0.773265,0.815266,-0.380382
1,1.116835,-1.243071,-0.142048,1.179837,0.443456,-1.476636,0.600536,0.50841,-1.186948
2,-0.177864,0.099886,-0.142048,-1.219008,0.443456,0.392384,-0.781301,1.122122,-0.644432
3,-0.076167,1.212647,-0.142048,1.179837,0.443456,0.392384,-0.263112,-0.412158,0.420622
4,1.107891,-1.243071,-0.142048,-1.219008,0.443456,0.392384,0.773265,-1.946438,0.702624


In [13]:
## Model Training

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [14]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mse)
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [15]:
## Train multiple models

models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'Elasticnet':ElasticNet(),
    # 'NaiveBias':MultinomialNB(),
    'KNNR':KNeighborsRegressor(n_neighbors=5),
    'DecisionTree':DecisionTreeRegressor(random_state=42),
    'SVR linear':SVR(kernel='linear'),
    'SVR rbf':SVR(kernel='rbf'),
    'RandomForest':RandomForestRegressor(random_state=42),
    'AdaBoost':AdaBoostRegressor(),
    'Gradient Boosting':GradientBoostingRegressor(),
    'XGB':xgb.XGBRegressor(),
    'BaggingSVR':BaggingRegressor()
}

trained_model_list = {'Model_Name':[], 'Model': [], 'R2_Score': []}

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    trained_model_list['Model_Name'].append(list(models.keys())[i])
    trained_model_list['Model'].append(model)
    trained_model_list['R2_Score'].append(r2_square*100)

    # trained_model_list.update({'Model_Name': list(models.keys())[i], 'Model': model, 'R2_Score': r2_square*100})

    print('='*35)
    print('\n')


LinearRegression
Model Training Performance
RMSE: 6.1695158601251014
MAE: 4.879938260829636
R2 score 56.88635725048776


Lasso
Model Training Performance
RMSE: 6.625281390525248
MAE: 5.290287198721534
R2 score 50.28113502767866


Ridge
Model Training Performance
RMSE: 6.169514641737902
MAE: 4.879939372720715
R2 score 56.88637427908658


Elasticnet
Model Training Performance
RMSE: 6.719366454334625
MAE: 5.387632762936917
R2 score 48.85900158268445


KNNR
Model Training Performance
RMSE: 4.715757914817471
MAE: 3.6249409332545777
R2 score 74.81073163684239


DecisionTree
Model Training Performance
RMSE: 5.537135345840584
MAE: 4.181740992321323
R2 score 65.2717557626601




  y = column_or_1d(y, warn=True)


SVR linear
Model Training Performance
RMSE: 6.183868336806741
MAE: 4.873145220199337
R2 score 56.68552874930357




  y = column_or_1d(y, warn=True)


SVR rbf
Model Training Performance
RMSE: 4.865354420949063
MAE: 3.768215715900914
R2 score 73.18724046475731




  return fit_method(estimator, *args, **kwargs)


RandomForest
Model Training Performance
RMSE: 4.183313543706348
MAE: 3.2606085826090627
R2 score 80.17773024845731




  y = column_or_1d(y, warn=True)


AdaBoost
Model Training Performance
RMSE: 5.981308049303936
MAE: 4.934490245185085
R2 score 59.4766921624923




  y = column_or_1d(y, warn=True)


Gradient Boosting
Model Training Performance
RMSE: 4.467813484507283
MAE: 3.534106335874945
R2 score 77.389892989566


XGB
Model Training Performance
RMSE: 4.045820651969668
MAE: 3.186726994015941
R2 score 81.45931384293523




  return column_or_1d(y, warn=True)


BaggingSVR
Model Training Performance
RMSE: 4.311253089236204
MAE: 3.359023429809018
R2 score 78.94672875520628




In [16]:
pd.DataFrame(trained_model_list)

Unnamed: 0,Model_Name,Model,R2_Score
0,LinearRegression,LinearRegression(),56.886357
1,Lasso,Lasso(),50.281135
2,Ridge,Ridge(),56.886374
3,Elasticnet,ElasticNet(),48.859002
4,KNNR,KNeighborsRegressor(),74.810732
5,DecisionTree,DecisionTreeRegressor(random_state=42),65.271756
6,SVR linear,SVR(kernel='linear'),56.685529
7,SVR rbf,SVR(),73.18724
8,RandomForest,"(DecisionTreeRegressor(max_features=1.0, rando...",80.17773
9,AdaBoost,"(DecisionTreeRegressor(max_depth=3, random_sta...",59.476692
