In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [6]:
data = pd.read_csv('travel_data.csv', encoding='ISO-8859-1')
data


Unnamed: 0,Number_of_Travelers,Budget,Area_of_Interest,Preferred_Climate,Transportation_Mode,Recommendation
0,14,75000,National Park,Warm,Bikes,"Gal Oya National Park, Ampara"
1,20,40000,City,Moderate,Rosa Bus,"Colombo City, Colombo"
2,17,38000,Beach,Warm,Bikes,"Hikkaduwa Beach, Hikkaduwa"
3,24,40000,Lakeside,Moderate,Rosa Bus,"Victoria Reservoir, Kandy"
4,1,1800,Desert,Hot,Rosa Bus,"Delft Island, Jaffna"
...,...,...,...,...,...,...
5135,5,14000,Beach,Warm,Van,"Hikkaduwa Beach, Hikkaduwa"
5136,16,25000,Mountains,Cool,Public Transport,"Diyatalawa, Badulla"
5137,1,150000,National Park,Warm,Rosa Bus,"Wasgamuwa National Park, Wasgamuwa"
5138,12,150000,Lakeside,Moderate,Bikes,"Kandy Lake, Kandy"


In [7]:
# Encode categorical variables
label_encoders = {}
for column in ['Area_of_Interest', 'Preferred_Climate', 'Transportation_Mode', 'Recommendation']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [8]:
# Split the dataset
X = data.drop(columns=['Recommendation'])
y = data['Recommendation']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn import tree
import matplotlib.pyplot as plt

In [12]:

# Initialize and train the decision tree model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

In [13]:
# Predict on the test set
y_pred = model.predict(X_test)

In [14]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'\nClassification Report:\n{report}')

Accuracy: 0.2772373540856031

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.11      0.08      0.10        12
           2       0.22      0.22      0.22         9
           3       0.00      0.00      0.00        10
           4       0.23      0.20      0.21        15
           5       0.32      0.33      0.32        18
           6       0.29      0.33      0.31         6
           7       0.00      0.00      0.00        13
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        10
          11       0.07      0.08      0.07        12
          12       0.12      0.10      0.11        10
          13       0.05      0.14      0.08         7
          14       1.00      1.00      1.00       142
          15       0.00      0.00      0.00         9
          16       0.00     

In [15]:
# # Plot the decision tree
# plt.figure(figsize=(20,10))
# tree.plot_tree(model, feature_names=X.columns, class_names=label_encoders['Recommendation'].classes_, filled=True)
# plt.show()


In [16]:
import joblib

# Save the model
joblib.dump(model, 'travel_decision_tree_model.pkl')

# Load the model
loaded_model = joblib.load('travel_decision_tree_model.pkl')


In [17]:
def predict_recommendation(input_data):
    # Preprocess the input data (encoding and scaling)
    input_df = pd.DataFrame([input_data], columns=X.columns)
    for column in ['Area_of_Interest', 'Preferred_Climate', 'Transportation_Mode']:
        input_df[column] = label_encoders[column].transform(input_df[column])
    input_scaled = scaler.transform(input_df)
    
    # Make prediction
    recommendation_encoded = loaded_model.predict(input_scaled)
    recommendation = label_encoders['Recommendation'].inverse_transform(recommendation_encoded)
    return recommendation[0]


In [26]:
# Test the function
test_input = {
    "Number_of_Travelers": 10,
    "Budget": 3000,
    "Area_of_Interest": "Mountain",
    "Preferred_Climate": "Cool",
    "Transportation_Mode": "Public"
}
print(predict_recommendation(test_input))


ValueError: y contains previously unseen labels: 'Mountain'