#load the data set

In [4]:
import pandas as pd
user_history_df = pd.read_csv("Final_Updated_Expanded_UserHistory.csv")


ModuleNotFoundError: No module named 'pandas'

#Convert the VisitDate column to datetime format

In [15]:
user_history_df['VisitDate'] = pd.to_datetime(user_history_df['VisitDate'])


#Extract features from the date

In [18]:
user_history_df['VisitYear'] = user_history_df['VisitDate'].dt.year
user_history_df['VisitMonth'] = user_history_df['VisitDate'].dt.month
user_history_df['VisitDay'] = user_history_df['VisitDate'].dt.day


#Drop the original VisitDate column

In [21]:
user_history_df.drop(columns=['VisitDate'], inplace=True)

#1. Import Necessary Libraries

In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, mean_squared_error


#2. Prepare Features and Labels

In [26]:
# Define features (X) and label (y)
X = user_history_df[['UserID', 'DestinationID', 'VisitYear', 'VisitMonth', 'VisitDay']]
y = user_history_df['ExperienceRating']

# Split into training (80%) and testing (20%) datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (important for SVM & Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


#3. Train Each Model Separately
#(a) Train Linear Regression Model

In [28]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict and evaluate
y_pred_lr = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred_lr)
print(f'Linear Regression MSE: {mse}')


Linear Regression MSE: 2.009799140957112


#(b) Train Logistic Regression Model (Binary Classification)

In [30]:
y_binary = (y >= 4).astype(int)  # Good (1) if rating ≥ 4, else Bad (0)
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X, y_binary, test_size=0.2, random_state=42)

log_model = LogisticRegression()
log_model.fit(X_train_bin, y_train_bin)

# Predict and evaluate
y_pred_log = log_model.predict(X_test_bin)
accuracy = accuracy_score(y_test_bin, y_pred_log)
print(f'Logistic Regression Accuracy: {accuracy}')


Logistic Regression Accuracy: 0.69


#c) Train Decision Tree Model

In [32]:
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Predict and evaluate
y_pred_dt = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_dt)
print(f'Decision Tree Accuracy: {accuracy}')


Decision Tree Accuracy: 0.22


#(d) Train Support Vector Machine (SVM) Model

In [34]:
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred_svm = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {accuracy}')


SVM Accuracy: 0.21


#Define a Prediction Function

In [36]:
def predict_experience(user_id, destination_id, visit_year, visit_month, visit_day):
    # Create a DataFrame for user input
    user_input = pd.DataFrame([[user_id, destination_id, visit_year, visit_month, visit_day]],
                              columns=['UserID', 'DestinationID', 'VisitYear', 'VisitMonth', 'VisitDay'])

    # Scale the input (for models that require scaling)
    user_input_scaled = scaler.transform(user_input)

    # Linear Regression Prediction
    lr_prediction = lr_model.predict(user_input)[0]

    # Logistic Regression Prediction (Convert to Good/Bad)
    log_prediction = log_model.predict(user_input)[0]
    log_result = "Good (4-5)" if log_prediction == 1 else "Bad (1-3)"

    # Decision Tree Prediction
    dt_prediction = dt_model.predict(user_input)[0]

    # SVM Prediction
    svm_prediction = svm_model.predict(user_input_scaled)[0]

    # Print Predictions
    print(f"Linear Regression Prediction (Experience Rating): {lr_prediction:.2f}")
    print(f"Logistic Regression Prediction (Good/Bad): {log_result}")
    print(f"Decision Tree Prediction (Experience Rating): {dt_prediction}")
    print(f"SVM Prediction (Experience Rating): {svm_prediction}")

    return {
        "Linear Regression": round(lr_prediction, 2),
        "Logistic Regression": log_result,
        "Decision Tree": dt_prediction,
        "SVM": svm_prediction
    }


#Get User Input and Predict

In [None]:
user_id = int(input("Enter UserID: "))
destination_id = int(input("Enter DestinationID: "))
visit_year = int(input("Enter Visit Year: "))
visit_month = int(input("Enter Visit Month: "))
visit_day = int(input("Enter Visit Day: "))

# Get predictions
predictions = predict_experience(user_id, destination_id, visit_year, visit_month, visit_day)


#save the models after training

In [None]:
import pickle

# Assuming you've already trained these models
# Replace these with your trained models
lr_model = ...  # Linear Regression model
log_model = ...  # Logistic Regression model
dt_model = ...  # Decision Tree model
svm_model = ...  # SVM model
scaler = ...  # Your trained scaler (if applicable)

# Save models
pickle.dump(lr_model, open("linear_regression.pkl", "wb"))
pickle.dump(log_model, open("logistic_regression.pkl", "wb"))
pickle.dump(dt_model, open("decision_tree.pkl", "wb"))
pickle.dump(svm_model, open("svm.pkl", "wb"))
pickle.dump(scaler, open("scaler.pkl", "wb"))

print("All models have been saved successfully!")


In [None]:
import pickle

# Save Linear Regression Model
with open("linear_regression.pkl", "wb") as model_file:
    pickle.dump(lr_model, model_file)  # Replace lr_model with your trained model

print("Model saved successfully!")


In [None]:
#After saving the model, run this command to download it
from google.colab import files
files.download("linear_regression.pkl")


In [None]:
import pickle
from google.colab import files

# Save the trained model
with open("linear_regression.pkl", "wb") as model_file:
    pickle.dump(lr_model, model_file)  # Ensure `lr_model` is your trained model

# Download the file
files.download("linear_regression.pkl")
