In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
import tensorflow.compat.v1 as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from ctypes import *
import matplotlib.pyplot as plt
import joblib

from math import pi, sin, cos, asin, pow, radians, sqrt, fabs
from datetime import datetime

from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVR
from lightgbm import LGBMRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Other machine learning tools
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, r2_score
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import StandardScaler
import scipy.stats as sps 
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

In [13]:
eeg_df = pd.read_excel('eegdata.xlsx')
ques_df = pd.read_excel('questionare.xlsx')
ques_df = ques_df.loc[:,['sub','group','gender','age','外向性','宜人性','尽责性','神经质','开放性']]

In [14]:
df_initial = pd.merge(eeg_df,ques_df,on=['sub'])
#将分类变量转为0与1
df_initial.replace('male',1,inplace=True)
df_initial.replace('female',0,inplace=True)
df_initial['group'].replace(1,0,inplace=True)  #group为0的是被社会拒斥的     
df_initial['group'].replace(2,1,inplace=True)  #group为1的是未被社会拒斥的
df_initial['con'].replace('neg',0,inplace=True)
df_initial['con'].replace('pos',1,inplace=True)


df_initial.drop(columns=['sub','group'],inplace=True)
x = df_initial.drop(columns=['外向性','宜人性','尽责性','神经质','开放性'])
x = StandardScaler().fit_transform(x)
y = df_initial.loc[:,['外向性','宜人性','尽责性','神经质','开放性']]

In [15]:
models = {
    'Decision Tree': DecisionTreeRegressor(),
    'K Nearest Neighbors': KNeighborsRegressor(n_neighbors=1),
    'Random Forest': RandomForestRegressor(n_estimators=2,max_depth=3),
    'Gaussian Naive Bayes': GaussianNB(var_smoothing=1),
    'Light GBM': LGBMRegressor(learning_rate=0.01, max_depth=3, n_estimators=2500),
    'XGBoost': XGBRegressor(n_estimators=2500,max_depth=3),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100,max_depth=5),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(50,50)),
    #'Support Vector Machine': SVR()
}

In [16]:
x = pd.DataFrame(x)

In [20]:
for i in ['外向性','宜人性','尽责性','神经质','开放性']:
    y_temp = y.loc[:,[i]]
    holdout_summary = pd.DataFrame(columns = ['mse'])
    for name, model in models.items():
        scores = cross_val_score(model,x,y_temp,cv=5,scoring='r2')
        holdout_summary.loc[name,:] = [scores.mean()]
        print(f"{name} processed")
    print(holdout_summary)


Decision Tree processed
K Nearest Neighbors processed
Random Forest processed
Gaussian Naive Bayes processed
Light GBM processed
XGBoost processed
Gradient Boosting processed
Neural Network processed
                           mse
Decision Tree        -3.069889
K Nearest Neighbors  -5.081464
Random Forest        -1.232696
Gaussian Naive Bayes -3.060711
Light GBM            -0.848961
XGBoost              -1.046759
Gradient Boosting    -1.113528
Neural Network       -1.115467
Decision Tree processed
K Nearest Neighbors processed
Random Forest processed
Gaussian Naive Bayes processed
Light GBM processed
XGBoost processed
Gradient Boosting processed
Neural Network processed
                           mse
Decision Tree        -1.293848
K Nearest Neighbors   -1.83079
Random Forest         -0.73194
Gaussian Naive Bayes -1.419298
Light GBM            -0.697939
XGBoost               -0.90065
Gradient Boosting    -0.800727
Neural Network       -1.372931
Decision Tree processed
K Nearest Neighbor