In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.neural_network import MLPRegressor

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
HotelReview = pd.read_csv("/content/drive/MyDrive/BAP Project/Datasets/Final Datasets/Hotel_Cleaned.csv")
HotelTrain = pd.read_csv("/content/drive/MyDrive/BAP Project/Datasets/Final Datasets/train_set.csv")
HotelTest = pd.read_csv("/content/drive/MyDrive/BAP Project/Datasets/Final Datasets/test_set.csv")

In [None]:
# For code testing
CodeTest = HotelReview.sample(n=1000, random_state=42)

In [None]:
# Preprocess data for NN
def data_preprocessing(df):
    df['Log_WordCount'] = np.log(df['Reviews_WordCount'])
    df = df.drop(['Hotel','Titles','Reviews','Dates','No_of_Reviews',\
                  'Reviews_WordCount', 'Rating',
                  'Address','Sentiment_Pos','Sentiment_Neg','Sentiment_Neu',\
                  'Sentiment','Year'], axis = 1)
    df = pd.get_dummies(df, columns = ['Travel_Type','Hotel_Class'],
                        drop_first = True)
    return df

In [None]:
def neural_network(X_train, X_test, y_train, y_test):
    # define parameter grid for hyperparameter tuning
    param_grid = {
        'hidden_layer_sizes': [(50,50), (50,50),
                               (100,80,70), (90,60,40), (50,30,20),
                               (50,50,30,30), (50,30,30,10)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01],
        'max_iter': [20, 30, 40]
    }

    model = MLPRegressor(random_state=42)

    # create grid search object
    grid_search = GridSearchCV(model, param_grid=param_grid,
                               cv=5,
                               scoring='neg_mean_squared_error',
                               n_jobs=-1)

    # fit grid search on training data
    grid_search.fit(X_train, y_train)

    # get best parameters and model
    best_params = grid_search.best_params_
    best_model = grid_search.best_estimator_
    print('Best Hyperparameters:', best_params)

    # Fit the model on the training data and make predictions on the test data
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    # Calculate the testing accuracy score
    test_score = best_model.score(X_test, y_test)
    print("Accuracy Score:", test_score)

    # predict on test data using best model
    y_pred = best_model.predict(X_test)

    # calculate performance metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # print results
    print('R2:', r2)
    print('RMSE:', rmse)
    print('MAE:', mae)

In [None]:
# This is the actual one. It can take pretty long to run.
HotelTrain_Processed = data_preprocessing(HotelTrain)
HotelTest_Processed = data_preprocessing(HotelTest)

y_train = HotelTrain_Processed['Sentiment_Compound']
x_train = HotelTrain_Processed.drop(['Sentiment_Compound'], axis = 1)
y_test = HotelTest_Processed['Sentiment_Compound']
x_test = HotelTest_Processed.drop(['Sentiment_Compound'], axis = 1)

neural_network(x_train, x_test, y_train, y_test)



Best Hyperparameters: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (50, 50, 30, 30), 'max_iter': 30}




Accuracy Score: 0.4323090712616796
R2: 0.4323090712616796
RMSE: 0.33999879051839405
MAE: 0.19586726571007837
