### Initialization

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2

from sklearn.ensemble import AdaBoostRegressor, \
    GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, \
    r2_score


import tensorflow as tf

In [17]:
dataset = pd.read_csv("dataset/psqi_memory_update.csv")

Removing outlier

In [102]:
dataset = dataset[dataset["Scenery_score"] != 0]
dataset = dataset[dataset["Reading_score"] != 0]
dataset = dataset[dataset["Digit_score"] != 0]

In [103]:
# Function to remove outliers using Mahalanobis distance
def remove_outliers_bivariate(df, columns, threshold=3.0):
    # Calculate mean and covariance matrix
    mean = df[columns].mean()
    cov_matrix = df[columns].cov()
    
    # Calculate inverse covariance matrix
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    
    # Calculate Mahalanobis distance for each data point
    delta = df[columns] - mean
    mahalanobis_sq = np.sum(np.dot(delta, inv_cov_matrix) * delta, axis=1)
    
    # Define the threshold for Mahalanobis distance
    chi2_threshold = chi2.ppf(0.95, df=len(columns))
    
    # Keep only the data points with Mahalanobis distance below the threshold
    df_filtered = df[mahalanobis_sq <= chi2_threshold]
    
    return df_filtered

# Specify columns for outlier removal: Using columns with lowest VIF
columns_to_check = ['Consumption_of_Sleeping_Pills','Scenery_score']

# Remove outliers
dataset_bi = remove_outliers_bivariate(dataset, columns_to_check)
print(len(dataset_bi))

237


In [104]:
# Function to remove outliers using Mahalanobis distance
def remove_outliers_multivariate(df, threshold=3.0):
    # Calculate mean and covariance matrix
    mean = df.mean()
    cov_matrix = df.cov()
    
    # Calculate inverse covariance matrix
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    
    # Calculate Mahalanobis distance for each data point
    delta = df - mean
    mahalanobis_sq = np.sum(np.dot(delta, inv_cov_matrix) * delta, axis=1)
    
    # Define the threshold for Mahalanobis distance
    chi2_threshold = chi2.ppf(0.95, df=len(df.columns))
    
    # Keep only the data points with Mahalanobis distance below the threshold
    df_filtered = df[mahalanobis_sq <= chi2_threshold]
    
    return df_filtered

# Remove outliers
dataset_multi_out = remove_outliers_multivariate(dataset)
print(len(dataset_multi_out))

226


In [105]:
dataset = dataset_multi_out
X = dataset.iloc[:, 1:7].values
X = np.delete(X, 3, axis=1)
y_scene = dataset.iloc[:, 7].values
y_read = dataset.iloc[:, 8].values
y_digit = dataset.iloc[:, 9].values

In [106]:
X_scene_train, X_scene_test, y_scene_train, y_scene_test = train_test_split(X, y_scene, test_size=0.2, random_state=42)
X_read_train, X_read_test, y_read_train, y_read_test = train_test_split(X, y_read, test_size=0.2, random_state=42)
X_digit_train, X_digit_test, y_digit_train, y_digit_test = train_test_split(X, y_digit, test_size=0.2, random_state=42)

### Define Function

In [107]:
def mape_calc(y_true, y_pred):
    data_len = len(y_true)
    mape = 0
    calc_zero = 0
    for i in range(data_len):
        if y_true[i] == 0:
            calc_zero += 1
            continue
        mape += np.mean(np.abs((y_true[i], y_pred[i]) / y_true[i]))
    return mape, calc_zero

### SVR

In [119]:
# Create SVR model
svr = SVR(kernel='rbf', C=10)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_scene_train, y_scene_train)

# Make predictions on the test set
y_scene_pred = svr.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 12.05189575745114
R squared: 0.07789226848544217
MAPE: 47.787170016705716 with skip data: 0


In [123]:
# Create SVR model
svr = SVR(kernel='linear', C=1e-5)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_read_train, y_read_train)

# Make predictions on the test set
y_read_pred = svr.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.3227091592347831
R squared: -0.11395200806003447
MAPE: 45.383318499999994 with skip data: 0


In [124]:
# Create SVR model
svr = SVR(kernel='linear', C=1)  # You can specify other kernel functions as well

# Train the SVR model
svr.fit(X_digit_train, y_digit_train)

# Make predictions on the test set
y_digit_pred = svr.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 3.1412964960156624
R squared: -0.1356540894531253
MAPE: 52.13546274363049 with skip data: 0


### Decision Tree

In [125]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_scene_train, y_scene_train)

# Predict the target values for the testing set
y_scene_pred = regressor.predict(X_scene_test)

# Evaluate the model
mse = mean_squared_error(y_scene_test, y_scene_pred)
r2 = r2_score(y_scene_test, y_scene_pred)
mape, calc_zero = mape_calc(y_scene_test, y_scene_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 14.409311463835074
R squared: -0.10247696910164206
MAPE: 47.15369714994329 with skip data: 0


In [126]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_read_train, y_read_train)

# Predict the target values for the testing set
y_read_pred = regressor.predict(X_read_test)

# Evaluate the model
mse = mean_squared_error(y_read_test, y_read_pred)
r2 = r2_score(y_read_test, y_read_pred)
mape, calc_zero = mape_calc(y_read_test, y_read_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 0.4014549347357119
R squared: -0.3857726621545945
MAPE: 46.82290293040294 with skip data: 0


In [127]:
# Create a decision tree regressor
regressor = DecisionTreeRegressor()

# Train the regressor on the training data
regressor.fit(X_digit_train, y_digit_train)

# Predict the target values for the testing set
y_digit_pred = regressor.predict(X_digit_test)

# Evaluate the model
mse = mean_squared_error(y_digit_test, y_digit_pred)
r2 = r2_score(y_digit_test, y_digit_pred)
mape, calc_zero = mape_calc(y_digit_test, y_digit_pred)
print("Mean Squared Error:", mse)
print("R squared:", r2)
print("MAPE:", mape, "with skip data:", calc_zero)

Mean Squared Error: 4.139726316695579
R squared: -0.4966104367209714
MAPE: 51.20188855456711 with skip data: 0
