In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score
from sklearn import metrics
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def run_GBM(Train_Path, Test_Path, Max_Depth):
    Train = pd.read_csv(Train_Path)
    Test  = pd.read_csv(Test_Path)

    Train = Train.dropna()
    Test  = Test.dropna()
    
    x_train = Train.iloc[:,0:-1]
    y_train = Train.iloc[:,-1]
    
    x_test = Test.iloc[:,0:-1]
    y_test = Test.iloc[:,-1]
    
    Model = GradientBoostingClassifier(learning_rate=0.05, max_depth=Max_Depth, random_state=1234)
    Model.fit(x_train, y_train)

    #train
    y_train_pred = Model.predict(x_train)
    precision_train,recall_train,f1Score_train,_ = precision_recall_fscore_support(y_train, y_train_pred,average='binary')
    fpr, tpr, thresholds = metrics.roc_curve(y_train, y_train_pred)
    AUC_train = metrics.auc(fpr, tpr)
    #test
    y_pred = Model.predict(x_test)
    precision_test,recall_test,f1Score_test,_ = precision_recall_fscore_support(y_test, y_pred,average='binary')
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred)
    AUC_test = metrics.auc(fpr, tpr)
    
    return  precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test
    
    

In [2]:
data_path= 'C:\\data\\PeakConcentration-Idea2-2-Clasification\\FullDay_Peak_Values\\'


Depth_List = [5,10,15,30,50,70,100]
for i in Depth_List:
    print("Max_Depth: ",i)
    precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test = \
                                            run_GBM(data_path+'NO2_Train.csv',data_path+'NO2_Test.csv', i)
    print("Train: ")
    print("precision: ",precision_train)
    print("recall: ",recall_train)
    print("F score: ",f1Score_train)
    print("AUC score: ",AUC_train)

    print("Test: ")
    print("precision: ",precision_test)
    print("recall: ",recall_test)
    print("F score: ",f1Score_test)
    print("AUC score: ",AUC_test)

Max_Depth:  5
Train: 
precision:  0.8350833848054354
recall:  0.9324137931034483
F score:  0.8810687520364939
AUC score:  0.8741379310344828
Test: 
precision:  0.7204819277108434
recall:  0.8191780821917808
F score:  0.7666666666666668
AUC score:  0.7506849315068493
Max_Depth:  10
Train: 
precision:  0.9972489683631361
recall:  1.0
F score:  0.9986225895316804
AUC score:  0.9986206896551725
Test: 
precision:  0.7371134020618557
recall:  0.7835616438356164
F score:  0.7596281540504647
AUC score:  0.752054794520548
Max_Depth:  15
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.7376623376623377
recall:  0.7780821917808219
F score:  0.7573333333333334
AUC score:  0.7506849315068492
Max_Depth:  30
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.6885245901639344
recall:  0.5753424657534246
F score:  0.626865671641791
AUC score:  0.6575342465753425
Max_Depth:  50
Train: 
precision:  1.0
recall:  1.0
F score:  1.0


In [3]:
data_path= 'C:\\data\\PeakConcentration-Idea2-2-Clasification\\FullDay_Peak_Values\\'


Depth_List = [5,10,15,30,50,70,100]
for i in Depth_List:
    print("Max_Depth: ",i)
    precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test = \
                                            run_GBM(data_path+'CO_Train.csv',data_path+'CO_Test.csv', i)
    print("Train: ")
    print("precision: ",precision_train)
    print("recall: ",recall_train)
    print("F score: ",f1Score_train)
    print("AUC score: ",AUC_train)

    print("Test: ")
    print("precision: ",precision_test)
    print("recall: ",recall_test)
    print("F score: ",f1Score_test)
    print("AUC score: ",AUC_test)

Max_Depth:  5
Train: 
precision:  0.8951456310679612
recall:  0.9518238128011012
F score:  0.9226150767178118
AUC score:  0.9201651754989677
Test: 
precision:  0.7797101449275362
recall:  0.736986301369863
F score:  0.7577464788732394
AUC score:  0.7643835616438356
Max_Depth:  10
Train: 
precision:  0.9986254295532646
recall:  1.0
F score:  0.9993122420907841
AUC score:  0.9993117687543014
Test: 
precision:  0.762532981530343
recall:  0.7917808219178082
F score:  0.7768817204301075
AUC score:  0.7726027397260274
Max_Depth:  15
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.7119113573407202
recall:  0.7041095890410959
F score:  0.7079889807162534
AUC score:  0.7095890410958904
Max_Depth:  30
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.6332288401253918
recall:  0.5534246575342465
F score:  0.5906432748538011
AUC score:  0.6164383561643836
Max_Depth:  50
Train: 
precision:  1.0
recall:  1.0
F score:  1.0


In [4]:
data_path= 'C:\\data\\PeakConcentration-Idea2-2-Clasification\\FullDay_Peak_Values\\'


Depth_List = [5,10,15,30,50,70,100]
for i in Depth_List:
    print("Max_Depth: ",i)
    precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test = \
                                            run_GBM(data_path+'O3_Train.csv',data_path+'O3_Test.csv', i)
    print("Train: ")
    print("precision: ",precision_train)
    print("recall: ",recall_train)
    print("F score: ",f1Score_train)
    print("AUC score: ",AUC_train)

    print("Test: ")
    print("precision: ",precision_test)
    print("recall: ",recall_test)
    print("F score: ",f1Score_test)
    print("AUC score: ",AUC_test)

Max_Depth:  5
Train: 
precision:  0.893698281349459
recall:  0.9656121045392022
F score:  0.9282644628099173
AUC score:  0.9253782668500689
Test: 
precision:  0.8366834170854272
recall:  0.9123287671232877
F score:  0.8728702490170379
AUC score:  0.8671232876712327
Max_Depth:  10
Train: 
precision:  0.9979409746053535
recall:  1.0
F score:  0.9989694263139814
AUC score:  0.998968363136176
Test: 
precision:  0.8041237113402062
recall:  0.8547945205479452
F score:  0.8286852589641434
AUC score:  0.8232876712328767
Max_Depth:  15
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.7836538461538461
recall:  0.8931506849315068
F score:  0.8348271446862996
AUC score:  0.8232876712328767
Max_Depth:  30
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.8084507042253521
recall:  0.7863013698630137
F score:  0.7972222222222223
AUC score:  0.7999999999999999
Max_Depth:  50
Train: 
precision:  1.0
recall:  1.0
F score:  1.0


In [5]:
data_path= 'C:\\data\\PeakConcentration-Idea2-2-Clasification\\FullDay_Peak_Values\\'


Depth_List = [5,10,15,30,50,70,100]
for i in Depth_List:
    print("Max_Depth: ",i)
    precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test = \
                                            run_GBM(data_path+'PM2_5_Train.csv',data_path+'PM2_5_Test.csv', i)
    print("Train: ")
    print("precision: ",precision_train)
    print("recall: ",recall_train)
    print("F score: ",f1Score_train)
    print("AUC score: ",AUC_train)

    print("Test: ")
    print("precision: ",precision_test)
    print("recall: ",recall_test)
    print("F score: ",f1Score_test)
    print("AUC score: ",AUC_test)

Max_Depth:  5
Train: 
precision:  0.8843094721619668
recall:  0.8434482758620689
F score:  0.8633956936110131
AUC score:  0.8665136479379357
Test: 
precision:  0.6345733041575492
recall:  0.7967032967032966
F score:  0.7064555420219244
AUC score:  0.6689560439560439
Max_Depth:  10
Train: 
precision:  0.9993108201240524
recall:  1.0
F score:  0.9996552912788693
AUC score:  0.999654934437543
Test: 
precision:  0.648854961832061
recall:  0.7005494505494505
F score:  0.6737120211360634
AUC score:  0.6607142857142857
Max_Depth:  15
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.6532258064516129
recall:  0.6675824175824175
F score:  0.6603260869565217
AUC score:  0.6565934065934065
Max_Depth:  30
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.61198738170347
recall:  0.532967032967033
F score:  0.5697503671071953
AUC score:  0.5975274725274724
Max_Depth:  50
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC

In [6]:
data_path= 'C:\\data\\PeakConcentration-Idea2-2-Clasification\\FullDay_Peak_Values\\'


Depth_List = [5,10,15,30,50,70,100]
for i in Depth_List:
    print("Max_Depth: ",i)
    precision_train,recall_train,f1Score_train,AUC_train, precision_test,recall_test,f1Score_test, AUC_test = \
                                            run_GBM(data_path+'SO2_Train.csv',data_path+'SO2_Test.csv', i)
    print("Train: ")
    print("precision: ",precision_train)
    print("recall: ",recall_train)
    print("F score: ",f1Score_train)
    print("AUC score: ",AUC_train)

    print("Test: ")
    print("precision: ",precision_test)
    print("recall: ",recall_test)
    print("F score: ",f1Score_test)
    print("AUC score: ",AUC_test)

Max_Depth:  5
Train: 
precision:  0.8505825908156271
recall:  0.8618055555555556
F score:  0.856157295619179
AUC score:  0.8552083333333335
Test: 
precision:  0.6711956521739131
recall:  0.6785714285714286
F score:  0.6748633879781422
AUC score:  0.6730769230769231
Max_Depth:  10
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.6422535211267606
recall:  0.6263736263736264
F score:  0.6342141863699583
AUC score:  0.6387362637362637
Max_Depth:  15
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.5767195767195767
recall:  0.5989010989010989
F score:  0.5876010781671158
AUC score:  0.5796703296703296
Max_Depth:  30
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.5776081424936387
recall:  0.6236263736263736
F score:  0.5997357992073976
AUC score:  0.5837912087912088
Max_Depth:  50
Train: 
precision:  1.0
recall:  1.0
F score:  1.0
AUC score:  1.0
Test: 
precision:  0.5602094