### Initialization

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2

from sklearn.ensemble import AdaBoostRegressor, \
    GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, \
    r2_score

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
dataset = pd.read_csv("dataset/psqi_memory_update.csv")

Removing outlier

In [3]:
dataset = dataset[dataset["Scenery_score"] != 0]
dataset = dataset[dataset["Reading_score"] != 0]
dataset = dataset[dataset["Digit_score"] != 0]

In [103]:
# Function to remove outliers using Mahalanobis distance
def remove_outliers_bivariate(df, columns, threshold=3.0):
    # Calculate mean and covariance matrix
    mean = df[columns].mean()
    cov_matrix = df[columns].cov()
    
    # Calculate inverse covariance matrix
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    
    # Calculate Mahalanobis distance for each data point
    delta = df[columns] - mean
    mahalanobis_sq = np.sum(np.dot(delta, inv_cov_matrix) * delta, axis=1)
    
    # Define the threshold for Mahalanobis distance
    chi2_threshold = chi2.ppf(0.95, df=len(columns))
    
    # Keep only the data points with Mahalanobis distance below the threshold
    df_filtered = df[mahalanobis_sq <= chi2_threshold]
    
    return df_filtered

# Specify columns for outlier removal: Using columns with lowest VIF
columns_to_check = ['Consumption_of_Sleeping_Pills','Scenery_score']

# Remove outliers
dataset_bi = remove_outliers_bivariate(dataset, columns_to_check)
print(len(dataset_bi))

237


In [4]:
# Function to remove outliers using Mahalanobis distance
def remove_outliers_multivariate(df, threshold=3.0):
    # Calculate mean and covariance matrix
    mean = df.mean()
    cov_matrix = df.cov()
    
    # Calculate inverse covariance matrix
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    
    # Calculate Mahalanobis distance for each data point
    delta = df - mean
    mahalanobis_sq = np.sum(np.dot(delta, inv_cov_matrix) * delta, axis=1)
    
    # Define the threshold for Mahalanobis distance
    chi2_threshold = chi2.ppf(0.95, df=len(df.columns))
    
    # Keep only the data points with Mahalanobis distance below the threshold
    df_filtered = df[mahalanobis_sq <= chi2_threshold]
    
    return df_filtered

# Remove outliers
dataset_multi_out = remove_outliers_multivariate(dataset)
print(len(dataset_multi_out))

249


In [5]:
dataset = dataset_multi_out
X = dataset.iloc[:, 1:7].values
X = np.delete(X, 3, axis=1)
y_scene = dataset.iloc[:, 7].values
y_read = dataset.iloc[:, 8].values
y_digit = dataset.iloc[:, 9].values

In [6]:
X_scene_train, X_scene_test, y_scene_train, y_scene_test = train_test_split(X, y_scene, test_size=0.2, random_state=42)
X_read_train, X_read_test, y_read_train, y_read_test = train_test_split(X, y_read, test_size=0.2, random_state=42)
X_digit_train, X_digit_test, y_digit_train, y_digit_test = train_test_split(X, y_digit, test_size=0.2, random_state=42)

### Define Function

In [7]:
def mape_calc(y_true, y_pred):
    data_len = len(y_true)
    mape = 0
    calc_zero = 0
    for i in range(data_len):
        if y_true[i] == 0:
            calc_zero += 1
            continue
        mape += np.mean(np.abs((y_true[i], y_pred[i]) / y_true[i]))
    return mape, calc_zero

### SVR

In [14]:
# Create SVR model
svr = SVR(kernel='rbf', C=10)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_scene_train, y_scene_train)

# Make predictions on the test set
y_scene_pred = svr.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mae = mean_absolute_error(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 13.001385359435094
Mean Absolute Error: 2.677990050355957
R squared: 0.06354365154318076
MAPE: 49.62612236416027 with skip data: 0


In [15]:
# Create SVR model
svr = SVR(kernel='linear', C=1e-5)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_read_train, y_read_train)

# Make predictions on the test set
y_read_pred = svr.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mae = mean_absolute_error(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.4898601155209992
Mean Absolute Error: 0.563961
R squared: -0.22465028880249793
MAPE: 48.84938050000001 with skip data: 0


In [16]:
# Create SVR model
svr = SVR(kernel='linear', C=1)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_digit_train, y_digit_train)

# Make predictions on the test set
y_digit_pred = svr.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mae = mean_absolute_error(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 3.2044202244223827
Mean Absolute Error: 1.4291777001906985
R squared: -0.292939083449961
MAPE: 54.83025240218771 with skip data: 0


### Decision Tree

In [17]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_scene_train, y_scene_train)

# Predict the target values for the testing set
y_scene_pred = regressor.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mae = mean_absolute_error(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 17.903429721160865
Mean Absolute Error: 3.1552482811306337
R squared: -0.28953799599245644
MAPE: 51.03601371270751 with skip data: 0


In [18]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_read_train, y_read_train)

# Predict the target values for the testing set
y_read_pred = regressor.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mae = mean_absolute_error(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.5987006883907346
Mean Absolute Error: 0.5726249045072574
R squared: -0.4967517209768366
MAPE: 50.46398480604363 with skip data: 0


In [21]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_digit_train, y_digit_train)

# Predict the target values for the testing set
y_digit_pred = regressor.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mae = mean_absolute_error(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 4.359030851254778
Mean Absolute Error: 1.7397410236822004
R squared: -0.7588084454707791
MAPE: 54.688619083997246 with skip data: 0


### Random Forest

In [20]:
# Create a decision tree regressor
regressor = RandomForestRegressor()

# Train the regressor on the training data
regressor.fit(X_scene_train, y_scene_train)

# Predict the target values for the testing set
y_scene_pred = regressor.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mae = mean_absolute_error(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 14.759687957254002
Mean Absolute Error: 2.9356115422289677
R squared: -0.06310236230185273
MAPE: 51.127014864716614 with skip data: 0


In [44]:
# Create a decision tree regressor
regressor = RandomForestRegressor()

# Train the regressor on the training data
regressor.fit(X_read_train, y_read_train)

# Predict the target values for the testing set
y_read_pred = regressor.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mae = mean_absolute_error(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.5144606261153553
Mean Absolute Error: 0.5792828756384678
R squared: -0.28615156528838837
MAPE: 50.0002699652735 with skip data: 0


In [23]:
# Create a decision tree regressor
regressor = RandomForestRegressor()

# Train the regressor on the training data
regressor.fit(X_digit_train, y_digit_train)

# Predict the target values for the testing set
y_digit_pred = regressor.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 3.6520649191079815
Mean Absolute Error: 0.5721815686343615
R squared: -0.47355750448191647
MAPE: 53.98433902859105 with skip data: 0


### Artificial Neural Network

In [31]:
regressor = tf.keras.models.Sequential([
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")    
])

regressor.compile(optimizer="adam",
                  loss="mse",
                  metrics=["mae"])

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

regressor.fit(X_scene_train, y_scene_train,
              validation_data=(X_scene_test, y_scene_test),
              epochs=1000, verbose=0,
              callbacks=[early_stopping])

mse, mae = regressor.evaluate(X_scene_test, y_scene_test)
y_scene_pred = regressor.predict(X_scene_test)
r2 = r2_score(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, np.squeeze(y_scene_pred))
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 13.717702865600586
Mean Absolute Error: 2.811037063598633
R squared: 0.011949208087265406
MAPE: 52.68199015760552 with skip data: 0


In [34]:
regressor = tf.keras.models.Sequential([
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    # tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")    
])

regressor.compile(optimizer="adam",
                  loss="mse",
                  metrics=["mae"])

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

regressor.fit(X_read_train, y_read_train,
              validation_data=(X_read_test, y_read_test),
              epochs=1000, verbose=0,
              callbacks=[early_stopping])

mse, mae = regressor.evaluate(X_read_test, y_read_test)
y_read_pred = regressor.predict(X_read_test)
r2 = r2_score(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, np.squeeze(y_read_pred))
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.4024452865123749
Mean Absolute Error: 0.5066123008728027
R squared: -0.006113310536142169
MAPE: 50.420398098230365 with skip data: 0


In [41]:
regressor = tf.keras.models.Sequential([
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(1, activation="relu")    
])

regressor.compile(optimizer="sgd",
                  loss="mse",
                  metrics=["mae"])

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

regressor.fit(X_digit_train, y_digit_train,
              validation_data=(X_digit_test, y_digit_test),
              epochs=1000, verbose=0,
              callbacks=[early_stopping])

mse, mae = regressor.evaluate(X_digit_test, y_digit_test)
y_digit_pred = regressor.predict(X_digit_test)
r2 = r2_score(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, np.squeeze(y_digit_pred))
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 2.979013681411743
Mean Absolute Error: 1.414380669593811
R squared: -0.20199069042150564
MAPE: 53.876656774112156 with skip data: 0


### XGBoost

In [42]:
# Create an AdaBoostRegressor
regressor = AdaBoostRegressor(n_estimators=100, random_state=42)

# Train the regressor on the training data
regressor.fit(X_scene_train, y_scene_train)

# Predict the target values for the testing set
y_scene_pred = regressor.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mae = mean_absolute_error(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)


Mean Squared Error: 13.717712945775524
Mean Absolute Error: 3.039947174426148
R squared: 0.01194841786168388
MAPE: 52.21713152302676 with skip data: 0


In [45]:
# Create an AdaBoostRegressor
regressor = AdaBoostRegressor(n_estimators=100, random_state=42)

# Train the regressor on the training data
regressor.fit(X_read_train, y_read_train)

# Predict the target values for the testing set
y_read_pred = regressor.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mae = mean_absolute_error(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.3935534199019265
Mean Absolute Error: 0.5532461343498268
R squared: 0.016116450245183822
MAPE: 50.46032133569779 with skip data: 0


In [49]:
# Create an AdaBoostRegressor
regressor = AdaBoostRegressor(n_estimators=100, random_state=42)

# Train the regressor on the training data
regressor.fit(X_digit_train, y_digit_train)

# Predict the target values for the testing set
y_digit_pred = regressor.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mae = mean_absolute_error(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 2.8570379449235697
Mean Absolute Error: 1.357624297729561
R squared: -0.1527751553113177
MAPE: 54.511101890312396 with skip data: 0
