In [None]:
from lib.utils import *
def train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,title):
    fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(7.2,4.45),dpi=500,sharex=True,sharey=True)
    y_pred = clf.predict(X_train)
    cm = confusion_matrix(y_train,y_pred,normalize='true')
    sns.heatmap(ax=axes[0],data=cm,annot=True)
    y_pred = clf.predict(X_test)
    cm = confusion_matrix(y_test,y_pred,normalize='true')
    sns.heatmap(ax=axes[1],data=cm,annot=True)
    fig.supxlabel('Predicted Label')
    fig.supylabel('True Label')
    axes[0].set_title(f'Training Data')
    axes[1].set_title(f'Testing Data')
    plt.suptitle(f'{title}',fontweight='heavy')
    plt.savefig(f'figures/{title}_train_test.jpg',dpi=200,bbox_inches='tight')
def test_confusion_matrix(X_test,y_test,clf,title):
    plt.figure(figsize=(5,5),dpi=500)
    y_pred = clf.predict(X_test)
    cm = confusion_matrix(y_test,y_pred,normalize='true')
    sns.heatmap(data=cm,annot=True)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'{title}',fontweight='heavy')
    plt.savefig(f'figures/{title}_test.jpg',dpi=200,bbox_inches='tight')

In [None]:
df = load_all_psd()
X = df.drop('label',axis=1)
y = df['label']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.2,shuffle=True,stratify=y,random_state=0)

# Imbalanced

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'DT_all_80_20_imbalanced')
test_confusion_matrix(X_test,y_test,clf,f'DT_all_80_20_imbalanced')

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'RF_all_80_20_imbalanced')
test_confusion_matrix(X_test,y_test,clf,f'RF_all_80_20_imbalanced')

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'KNN_all_80_20_imbalanced')
test_confusion_matrix(X_test,y_test,clf,f'KNN_all_80_20_imbalanced')

# Balance by Class Weight

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(class_weight='balanced')
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'DT_all_80_20_class_weight')
test_confusion_matrix(X_test,y_test,clf,f'DT_all_80_20_class_weight')

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(class_weight='balanced')
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'RF_all_80_20_class_weight')
test_confusion_matrix(X_test,y_test,clf,f'RF_all_80_20_class_weight')

# SMOTE

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
sm = SMOTE(random_state=0)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'DT_all_80_20_smote')
test_confusion_matrix(X_test,y_test,clf,f'DT_all_80_20_smote')

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'RF_all_80_20_smote')
test_confusion_matrix(X_test,y_test,clf,f'RF_all_80_20_smote')

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
clf.fit(X_train,y_train)
train_test_confusion_matrices(X_train,X_test,y_train,y_test,clf,f'KNN_all_80_20_smote')
test_confusion_matrix(X_test,y_test,clf,f'KNN_all_80_20_smote')