In [87]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import datetime
import numpy as np

In [88]:
# Read the dataset from CSV
df = pd.read_excel('Transportation_Recommandation_SampleDataset (Himasha).xlsx')

In [89]:
df

Unnamed: 0,Current Location,Destination,Distance (km),Number of Passengers,Vehicle Condition,Time Duration (hours),Fare (LKR),Number of Seats Available,Comfort Level,Transportation Mode
0,Matara,Galle,28.0,2,A/C,1.50,600,4,Medium,Car
1,Matara,Mirissa,41.0,3,Non A/C,2.00,900,6,Medium,Van
2,Matara,Tangalle,52.0,2,A/C,1.50,800,4,Medium,Car
3,Matara,Hambantota,66.0,4,Non A/C,3.00,1500,8,High,Bus
4,Matara,Kataragama,221.0,2,A/C,5.50,5000,4,High,Car
...,...,...,...,...,...,...,...,...,...,...
203,Habaraduwa,Dondra,30.0,2,Non A/C,1.00,1000,4,Medium,Car
204,Habaraduwa,Mirijjawila,50.0,3,A/C,2.00,2000,5,Medium,Car
205,Habaraduwa,Koggala,10.0,1,Non A/C,0.30,300,1,Low,Tuk-tuk
206,Bata Atha,Hikkaduwa,5.0,1,Non A/C,0.25,1000,2,Low,Motorbike


In [90]:
# Splitting the data into train and test sets
X = df[['Current Location', 'Destination', 'Comfort Level', 'Number of Passengers', 
        'Vehicle Condition', 'Time Duration (hours)']]
y = df[['Transportation Mode', 'Fare (LKR)', 'Distance (km)','Number of Seats Available']]


In [91]:
# Label encoding for the target variable
label_encoder = LabelEncoder()
y['Transportation Mode'] = label_encoder.fit_transform(y['Transportation Mode'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y['Transportation Mode'] = label_encoder.fit_transform(y['Transportation Mode'])


In [92]:
# Applying one-hot encoding to categorical features and target variable
cat_features = ['Current Location', 'Vehicle Condition', 'Comfort Level', 'Destination']
preprocessor = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(handle_unknown='ignore'), cat_features)],
    remainder='passthrough')

In [93]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [94]:
X_train_encoded = preprocessor.fit_transform(X_train)
X_test_encoded = preprocessor.transform(X_test)

In [95]:
X_train

Unnamed: 0,Current Location,Destination,Comfort Level,Number of Passengers,Vehicle Condition,Time Duration (hours)
86,Bentota,Matara,High,4,A/C,3.50
203,Habaraduwa,Dondra,Medium,2,Non A/C,1.00
67,Talpe,Weligama,Low,1,Non A/C,0.25
82,Bentota,Hikkaduwa,Medium,3,Non A/C,1.00
205,Habaraduwa,Koggala,Low,1,Non A/C,0.30
...,...,...,...,...,...,...
106,Unawatuna,Galle,High,2,A/C,0.50
14,Galle,Koggala,Low,1,Non A/C,1.00
92,Bentota,Tangalle,High,4,A/C,4.00
179,Deniyaya,Mirissa,Medium,4,A/C,4.00


In [96]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [97]:
X_train

<166x60 sparse matrix of type '<class 'numpy.float64'>'
	with 996 stored elements in Compressed Sparse Row format>

In [98]:
# Training the model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [99]:
# Make predictions on the test set
y_pred = model.predict(X_test_encoded)

# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Get the current date and time
current_datetime = datetime.datetime.now()
print("Current Date and Time:", current_datetime)

Mean Squared Error: 234061.9783842619
Current Date and Time: 2023-09-04 03:28:33.950175


In [100]:
# Convert the predicted values back to their original form
predicted_y = pd.DataFrame({'Transportation_Mode': label_encoder.inverse_transform(np.round(y_pred[:, 0]).astype(int)),
                            'Fare': y_pred[:, 1],
                            'Distance': y_pred[:, 2],
                            'Number_of_Seats_Available': np.round(y_pred[:, 3]).astype(int)})



In [101]:
y_pred[:, 0]

array([3.74, 1.83, 1.8 , 2.89, 1.78, 2.36, 2.95, 1.82, 2.76, 3.4 , 2.18,
       1.85, 3.7 , 3.44, 3.  , 1.6 , 2.39, 2.52, 2.56, 2.23, 1.86, 3.51,
       2.82, 3.27, 2.29, 3.18, 3.19, 3.78, 4.  , 3.8 , 3.87, 4.28, 1.7 ,
       3.54, 2.3 , 2.19, 2.33, 2.58, 2.35, 2.91, 2.78, 3.58])

In [102]:
label_encoder.inverse_transform(np.round(y_pred[:, 0]).astype(int))


array(['Tuk-tuk', 'Motorbike', 'Motorbike', 'Tuk-Tuk', 'Motorbike',
       'Motorbike', 'Tuk-Tuk', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk',
       'Motorbike', 'Motorbike', 'Tuk-tuk', 'Tuk-Tuk', 'Tuk-Tuk',
       'Motorbike', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk', 'Motorbike',
       'Motorbike', 'Tuk-tuk', 'Tuk-Tuk', 'Tuk-Tuk', 'Motorbike',
       'Tuk-Tuk', 'Tuk-Tuk', 'Tuk-tuk', 'Tuk-tuk', 'Tuk-tuk', 'Tuk-tuk',
       'Tuk-tuk', 'Motorbike', 'Tuk-tuk', 'Motorbike', 'Motorbike',
       'Motorbike', 'Tuk-Tuk', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk',
       'Tuk-tuk'], dtype=object)

In [103]:
predicted_y

Unnamed: 0,Transportation_Mode,Fare,Distance,Number_of_Seats_Available
0,Tuk-tuk,616.0,7.342,2
1,Motorbike,449.5,7.965,4
2,Motorbike,984.0,14.732,4
3,Tuk-Tuk,754.0,10.45,3
4,Motorbike,2318.0,53.541,4
5,Motorbike,997.0,24.261,4
6,Tuk-Tuk,740.0,17.238,2
7,Motorbike,2604.0,50.472,4
8,Tuk-Tuk,2484.0,50.911,5
9,Tuk-Tuk,1066.0,19.922,3


In [104]:
np.round(y_pred[:, 0]).astype(int)

array([4, 2, 2, 3, 2, 2, 3, 2, 3, 3, 2, 2, 4, 3, 3, 2, 2, 3, 3, 2, 2, 4,
       3, 3, 2, 3, 3, 4, 4, 4, 4, 4, 2, 4, 2, 2, 2, 3, 2, 3, 3, 4])

In [105]:
label_encoder.inverse_transform(np.round(y_pred[:, 0]).astype(int))

array(['Tuk-tuk', 'Motorbike', 'Motorbike', 'Tuk-Tuk', 'Motorbike',
       'Motorbike', 'Tuk-Tuk', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk',
       'Motorbike', 'Motorbike', 'Tuk-tuk', 'Tuk-Tuk', 'Tuk-Tuk',
       'Motorbike', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk', 'Motorbike',
       'Motorbike', 'Tuk-tuk', 'Tuk-Tuk', 'Tuk-Tuk', 'Motorbike',
       'Tuk-Tuk', 'Tuk-Tuk', 'Tuk-tuk', 'Tuk-tuk', 'Tuk-tuk', 'Tuk-tuk',
       'Tuk-tuk', 'Motorbike', 'Tuk-tuk', 'Motorbike', 'Motorbike',
       'Motorbike', 'Tuk-Tuk', 'Motorbike', 'Tuk-Tuk', 'Tuk-Tuk',
       'Tuk-tuk'], dtype=object)

In [106]:
import pickle

with open('transport_recommendation.dat', 'wb') as f:
    pickle.dump(model, f)


In [107]:
with open('transport_recommendation.dat' , 'rb') as f:
    amodel = pickle.load(f)

In [119]:
import pandas as pd
data=preprocessor.transform(pd.DataFrame([['Bentota','Matara','High',3,'Non A/C',3.50]], columns=['Current Location','Destination','Comfort Level','Number of Passengers','Vehicle Condition','Time Duration (hours)']))
new_data=amodel.predict(data)
label_encoder.inverse_transform(np.round(new_data[:, 0]).astype(int))

array(['Van'], dtype=object)