In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

In [2]:
df = pd.read_csv("processed.csv")
df.drop("Unnamed: 0",axis=1,inplace=True)
df.head()

Unnamed: 0,ACC_x,ACC_y,ACC_z,EDA,BVP,TEMP,label
0,1.074802,0.773372,0.288853,0.226233,-1.72813,1.983414,0
1,0.861713,-1.121982,1.05125,0.22182,1.451251,1.983414,0
2,0.73386,-0.648144,0.587183,0.208582,2.54345,1.983414,0
3,0.92564,-0.34661,0.520887,0.169752,-1.99002,1.983414,0
4,0.563389,-0.734296,0.918659,0.153867,-1.054558,1.983414,0


In [4]:
df.drop(["BVP"],axis=1).head()

Unnamed: 0,ACC_x,ACC_y,ACC_z,EDA,TEMP,label
0,1.074802,0.773372,0.288853,0.226233,1.983414,0
1,0.861713,-1.121982,1.05125,0.22182,1.983414,0
2,0.73386,-0.648144,0.587183,0.208582,1.983414,0
3,0.92564,-0.34661,0.520887,0.169752,1.983414,0
4,0.563389,-0.734296,0.918659,0.153867,1.983414,0


In [14]:
df.describe()

Unnamed: 0,ACC_x,ACC_y,ACC_z,EDA,BVP,TEMP,label
count,142271.0,142271.0,142271.0,142271.0,142271.0,142271.0,142271.0
mean,7.031961000000001e-17,-1.4383560000000002e-17,5.2739710000000004e-17,-1.550228e-16,-3.7557070000000004e-17,2.887899e-15,1.350711
std,1.000004,1.000004,1.000004,1.000004,1.000004,1.000004,1.29063
min,-3.037811,-2.543497,-2.727586,-0.8523047,-2.813386,-2.290009,0.0
25%,-1.16263,-0.5619913,-0.6724293,-0.6920327,-0.5679787,-0.7235052,0.0
50%,0.4568444,0.1272282,0.08996735,-0.5237478,0.05797745,0.1662691,1.0
75%,0.8830218,0.6010666,0.7860687,0.3022151,0.5837409,0.7051466,3.0
max,2.395952,2.625649,2.741782,3.085749,2.763179,2.08367,3.0


In [3]:
df.label.value_counts()

0    57551
3    44742
1    22015
2    17963
Name: label, dtype: int64

In [4]:
def train (model,X,y,flag,sample=False):
    if sample ==True:
        X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.4)
    else:
        X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7)
    model.fit(X_train,y_train)
    if flag=="test":
        y_pred = cross_val_predict(model, X_test, y_test, cv=5)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test,y_pred)
        precision = precision_score(y_test, y_pred,average='weighted')
        recall = recall_score(y_test, y_pred,average='weighted')
        f1 = f1_score(y_test, y_pred,average='weighted')
        conf_matrix = confusion_matrix(y_test, y_pred)
    else:
        y_pred = cross_val_predict(model, X_train, y_train, cv=5)
        # Calculate metrics
        accuracy = accuracy_score(y_train,y_pred)
        precision = precision_score(y_train, y_pred,average='weighted')
        recall = recall_score(y_train, y_pred,average='weighted')
        f1 = f1_score(y_train, y_pred,average='weighted')
        conf_matrix = confusion_matrix(y_train, y_pred)
    
    return accuracy, precision, recall, f1, conf_matrix

## Stress vs Non Stress

In [5]:
dfs = df.copy()
label_mapping = {0: 0, 1: 1, 2: 0, 3: 0}
dfs['label'] = dfs['label'].replace(label_mapping)
dfs.label.value_counts()

0    120256
1     22015
Name: label, dtype: int64

In [6]:
Xs = dfs.drop(['label'],axis=1)
ys = dfs['label']

In [7]:

smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(dfs[['ACC_x', 'ACC_y', 'ACC_z', 'EDA', 'BVP', 'TEMP']], dfs['label'])

In [8]:
y_smote.value_counts()

0    120256
1    120256
Name: label, dtype: int64

In [13]:
rfa,rfp,rfr,rff1,rfc = train(LogisticRegression(),X_smote,y_smote,"test")
rfa,rfp,rfr,rff1

(0.667974055492419, 0.6687675868882941, 0.667974055492419, 0.667622974001774)

In [None]:
rfa,rfp,rfr,rff1,rfc = train(RandomForestClassifier(),X_smote,y_smote,"test")

In [None]:
rfa,rfp,rfr,rff1

(0.9990852897968234, 0.999085365249107, 0.9990852897968234, 0.9990852899099718)

In [54]:
rfc

array([[36067,    40],
       [   26, 36021]], dtype=int64)

In [55]:
rfa,rfp,rfr,rff1,rfc = train(RandomForestClassifier(),X_smote,y_smote,"train")

In [56]:
rfa,rfp,rfr,rff1,rfc

(0.9995545207236959,
 0.9995545718452375,
 0.9995545207236959,
 0.999554520576446,
 array([[83968,    51],
        [   24, 84315]], dtype=int64))

## Stress vs Amusment vs Non Stress

In [5]:
df3 = df.copy()
label_mapping = {0: 0, 1: 1, 2: 2, 3: 0}
df3['label'] = df3['label'].replace(label_mapping)
df3.label.value_counts()

0    102293
1     22015
2     17963
Name: label, dtype: int64

In [8]:
X3 = df3.drop(['label','BVP'],axis=1)
y3 = df3['label']

smote = SMOTE(random_state=42)
X3_smote, y3_smote = smote.fit_resample(df3[['ACC_x', 'ACC_y', 'ACC_z', 'EDA', 'TEMP']], df3['label'])

In [10]:
rfmodel = RandomForestClassifier()
X_train,X_test,y_train,y_test = train_test_split(X3_smote,y3_smote,train_size=0.7)
rfmodel.fit(X_train,y_train)

RandomForestClassifier()

In [12]:
preds = rfmodel.predict(X_test)
accuracy_score(preds,y_test)

0.9996089676746611

In [13]:
import pickle

# Save the trained RF model to a file using pickle
with open('rf_model.pkl', 'wb') as model_file:
    pickle.dump(rfmodel, model_file)


In [9]:
rfa,rfp,rfr,rff1,rfc = train(RandomForestClassifier(),X3_smote,y3_smote,"test")
rfa,rfp,rfr,rff1,rfc

(0.9991636253041363,
 0.999163914479958,
 0.9991636253041363,
 0.9991636419877408,
 array([[30859,    19,     8],
        [   13, 30553,     0],
        [   19,    18, 30575]], dtype=int64))

In [60]:
rfa,rfp,rfr,rff1,rfc = train(RandomForestClassifier(),X3_smote,y3_smote,"train")
rfa,rfp,rfr,rff1,rfc

(0.9993948281079068,
 0.9993949938039443,
 0.9993948281079068,
 0.9993948289781769,
 array([[71611,    36,    11],
        [   14, 71640,     1],
        [   48,    20, 71434]], dtype=int64))

In [61]:
rfa,rfp,rfr,rff1,rfc = train(LogisticRegression(),X3_smote,y3_smote,"test")
rfa,rfp,rfr,rff1,rfc

(0.4399113660062565,
 0.42789106825044254,
 0.4399113660062565,
 0.4304166034473496,
 array([[ 9565, 11013, 10244],
        [ 3764, 20023,  6902],
        [12440,  7201, 10912]], dtype=int64))