In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


In [2]:
data = pd.read_csv('travel_data.csv', encoding='ISO-8859-1')
data


Unnamed: 0,Number_of_Travelers,Budget,Area_of_Interest,Preferred_Climate,Transportation_Mode,Recommendation
0,14,75000,National Park,Warm,Bikes,"Gal Oya National Park, Ampara"
1,20,40000,City,Moderate,Rosa Bus,"Colombo City, Colombo"
2,17,38000,Beach,Warm,Bikes,"Hikkaduwa Beach, Hikkaduwa"
3,24,40000,Lakeside,Moderate,Rosa Bus,"Victoria Reservoir, Kandy"
4,1,1800,Desert,Hot,Rosa Bus,"Delft Island, Jaffna"
...,...,...,...,...,...,...
5135,5,14000,Beach,Warm,Van,"Hikkaduwa Beach, Hikkaduwa"
5136,16,25000,Mountains,Cool,Public Transport,"Diyatalawa, Badulla"
5137,1,150000,National Park,Warm,Rosa Bus,"Wasgamuwa National Park, Wasgamuwa"
5138,12,150000,Lakeside,Moderate,Bikes,"Kandy Lake, Kandy"


In [3]:
label_encoders = {}
for column in ['Area_of_Interest', 'Preferred_Climate', 'Transportation_Mode', 'Recommendation']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le


In [4]:
X = data.drop(columns=['Recommendation'])
y = data['Recommendation']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=125, random_state=42)


In [7]:
model.fit(X_train, y_train)


In [8]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')


Accuracy: 0.27431906614785995
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.00      0.00      0.00        12
           2       0.25      0.44      0.32         9
           3       0.00      0.00      0.00        10
           4       0.08      0.07      0.07        15
           5       0.21      0.17      0.19        18
           6       0.33      0.33      0.33         6
           7       0.00      0.00      0.00        13
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        10
          11       0.10      0.17      0.12        12
          12       0.10      0.10      0.10        10
          13       0.06      0.14      0.09         7
          14       1.00      1.00      1.00       142
          15       0.00      0.00      0.00         9
          16       0.00     

In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [4, 5, 6, 7, 8],
    'criterion': ['gini', 'entropy']
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=25, n_jobs=1, verbose=2)

grid_search.fit(X_train, y_train)

print(f'Best Params: {grid_search.best_params_}')


Fitting 25 folds for each of 90 candidates, totalling 2250 fits
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100; total time=   0



[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_estimators=300; total time=   0.0s
[CV] END criterion=gini, max_depth=4, max_features=auto, n_e

750 fits failed out of a total of 2250.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
750 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python

Best Params: {'criterion': 'gini', 'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 300}


In [15]:
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

y_pred_best = best_model.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred_best)
report_best = classification_report(y_test, y_pred_best)

print(f'Best Accuracy: {accuracy_best}')
print(f'Best Classification Report:\n{report_best}')


Best Accuracy: 0.2928015564202335
Best Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.00      0.00      0.00        12
           2       0.38      0.33      0.35         9
           3       0.00      0.00      0.00        10
           4       0.14      0.20      0.17        15
           5       0.27      0.17      0.21        18
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00        13
           8       0.03      1.00      0.06         2
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        10
          11       0.33      0.08      0.13        12
          12       0.00      0.00      0.00        10
          13       0.00      0.00      0.00         7
          14       1.00      1.00      1.00       142
          15       0.00      0.00      0.00         9
          16       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
import joblib

joblib.dump(best_model, 'travel_recommendation_model.pkl')



['travel_recommendation_model.pkl']

In [17]:
loaded_model = joblib.load('travel_recommendation_model.pkl')


In [18]:
def predict_recommendation(input_data):
    # Preprocess the input data (encoding and scaling)
    input_df = pd.DataFrame([input_data], columns=X.columns)
    for column in ['Area_of_Interest', 'Preferred_Climate', 'Transportation_Mode']:
        input_df[column] = label_encoders[column].transform(input_df[column])
    input_scaled = scaler.transform(input_df)
    
    # Make prediction
    recommendation_encoded = loaded_model.predict(input_scaled)
    recommendation = label_encoders['Recommendation'].inverse_transform(recommendation_encoded)
    return recommendation[0]


In [20]:
test_input = {
    "Number_of_Travelers": 4,
    "Budget": 5000,
    "Area_of_Interest": "Mountains",
    "Preferred_Climate": "Cool",
    "Transportation_Mode": "Van"
}
print(predict_recommendation(test_input))


Horton Plains National Park, Nuwara Eliya
