In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('travel_data.csv')

# Encode categorical variables
label_encoders = {}
columns_to_encode = ['Area_of_Interest', 'Preferred_Climate', 'Transportation_Mode', 'Recommendation']

for column in columns_to_encode:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Split data into features and target
X = data.drop(columns=['Recommendation'])
y = data['Recommendation']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the initial model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the initial model
predictions = model.predict(X_test)
initial_accuracy = accuracy_score(y_test, predictions)
initial_report = classification_report(y_test, predictions)

print(f'Initial Accuracy: {initial_accuracy}')
print(f'Initial Classification Report:\n{initial_report}')

# Hyperparameter tuning with GridSearchCV
param_grid = {
    "n_estimators": [50, 100, 150, 200],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "max_features": ['sqrt', 'log2', None],
    "bootstrap": [True, False]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Best model evaluation
best_model = grid_search.best_estimator_
best_predictions = best_model.predict(X_test)
best_accuracy = accuracy_score(y_test, best_predictions)
best_report = classification_report(y_test, best_predictions)

print(f'Best Accuracy: {best_accuracy}')
print(f'Best Classification Report:\n{best_report}')


Initial Accuracy: 0.2826086956521739
Initial Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00        12
           2       0.25      0.23      0.24        13
           3       0.00      0.00      0.00         8
           4       0.13      0.15      0.14        13
           5       0.12      0.11      0.12        18
           6       0.00      0.00      0.00        11
           7       0.08      0.08      0.08        13
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00        10
          10       0.12      0.17      0.14        12
          11       0.24      0.22      0.23        18
          12       0.20      0.33      0.25         9
          13       0.10      0.10      0.10        10
          14       1.00      1.00      1.00       151
          15       0.00      0.00      0.00         5
          16 

KeyboardInterrupt: 

In [10]:
encored_recommendation = data['Recommendation']

In [16]:
from sklearn.preprocessing import LabelEncoder
obj = LabelEncoder()
recommendation_result = obj.fit_transform(encored_recommendation)

In [17]:
recommendation_result

array([52, 74, 45, ..., 39, 87, 29])

In [18]:
data['Recommendation'] = recommendation_result
data.head()

Unnamed: 0,Number_of_Travelers,Budget,Area_of_Interest,Preferred_Climate,Transportation_Mode,Recommendation
0,6,35000,Beach,Warm,Bikes,52
1,4,80000,Forest,Tropical,Public Transport,74
2,2,70000,City,Moderate,Van,45
3,6,17000,Beach,Warm,Bikes,34
4,25,55000,Forest,Tropical,Public Transport,29
