# Performance

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold,StratifiedKFold,RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

# READING PRRCESSED DATA

In [2]:
df=pd.read_csv("./Datasets/DataSet_54F_mfcc.csv")

#feature # binding
F=\
    {1:'stay_duration', #f1
     2:'mfcc0',3:'mfcc1',4:'mfcc2',5:'mfcc3',6:'mfcc4', #f2,f3,f4,f5,f6
     7:'wifi_count', 8:'edge_wifi_count', #f7,f8
     9:'RSI', #f9
     10:'human_made', 11:'natural_land',12:'road_exist_percent',13:'highly_populated_poi_exist'#f10,f11,f12,f13
    }

#feature Selection Filter
selected_feat=\
    {
        'Is_Bus_stop':[F[e] for e in [10,12,11,1,8]],
        'Is_Turn':[F[e] for e in [10,12,11,1,8,9]],
        'Is_Signal':[F[e] for e in [12,11,10,8,9]],
        'Is_Congestion':[F[e] for e in [2,11,10,9,12,8,6,5]], #1 replace with 5
        'Is_Adhoc':[F[e] for e in [10,11,1,12,2]]
    }

In [3]:
def get_feature_selected_data(poi_column):
    data=df[selected_feat[poi_column]].values #filtering according to selected poi top feat
    labels=df[poi_column].values
    return data,labels

In [4]:
def get_data(data,labels,train_index,test_index):
    return data[train_index,:],labels[train_index],data[test_index,:],labels[test_index]


def get_metrics_from_the_model(train,labels_train,test,labels_test):
    rf=RandomForestClassifier(n_estimators=100,max_depth=8)
    rf.fit(train, labels_train)

    pred_train= rf.predict(train)
    pred_test= rf.predict(test)
    
    performance={'tr_acc':accuracy_score(labels_train,pred_train),
                 'tr_precision':precision_score(labels_train,pred_train,average='weighted'),
                 'tr_recall':recall_score(labels_train,pred_train,average='weighted'),
                 'tr_f1-score':f1_score(labels_train,pred_train,average='weighted'),

                 'te_acc':accuracy_score(labels_test,pred_test),
                 'te_precision':precision_score(labels_test, pred_test,average='weighted'),
                 'te_recall':recall_score(labels_test, pred_test,average='weighted'),
                 'te_f1-score':f1_score(labels_test, pred_test,average='weighted')}
    return performance

In [5]:
def get_performance_of_poi_column(poi_column=None,fold=5,fold_repeat=10):
    # Spliting in Train and Test set
    data,labels=get_feature_selected_data(poi_column)
    
    train_acc=[]
    train_prec=[]
    train_recall=[]
    train_f1=[]
    
    test_acc=[]
    test_prec=[]
    test_recall=[]
    test_f1=[]
    
    #np.random.seed(0)
    #kf=StratifiedKFold(n_splits=fold) #this K Fold method takes class imbalance into account....
    kf=RepeatedStratifiedKFold(n_splits=fold,n_repeats=fold_repeat) #this will repeat StratifiedKFold N times
    #kf = KFold(n_splits=fold)
    
    for train_index, test_index in kf.split(data,labels):
        train,labels_train,test,labels_test=get_data(data,labels,train_index,test_index)
        perf=get_metrics_from_the_model(train,labels_train,test,labels_test)
        
        train_acc.append(perf['tr_acc'])
        train_prec.append(perf['tr_precision'])
        train_recall.append(perf['tr_recall'])
        train_f1.append(perf['tr_f1-score'])
        
        test_acc.append(perf['te_acc'])
        test_prec.append(perf['te_precision'])
        test_recall.append(perf['te_recall'])
        test_f1.append(perf['te_f1-score'])


    performance={'poi_column':poi_column,

                 'Size':str(len(labels))+">>"+str({0:len(labels)-labels.sum(),
                                                   1:labels.sum()}),
                 'tr_acc':{'mean':np.mean(train_acc),'std':np.std(train_acc)},
                 'tr_precision':{'mean':np.mean(train_prec),'std':np.std(train_prec)},
                 'tr_recall':{'mean':np.mean(train_recall),'std':np.std(train_recall)},
                 'tr_f1-score':{'mean':np.mean(train_f1),'std':np.std(train_f1)},

                 'te_acc':{'mean':np.mean(test_acc),'std':np.std(test_acc)},
                 'te_precision':{'mean':np.mean(test_prec),'std':np.std(test_prec)},
                 'te_recall':{'mean':np.mean(test_recall),'std':np.std(test_recall)},
                 'te_f1-score':{'mean':np.mean(test_f1),'std':np.std(test_f1)}}
    return performance

In [6]:
#input
fold=5
fold_repeat=10

l=[]
for column in ['Is_Bus_stop','Is_Turn','Is_Signal','Is_Congestion','Is_Adhoc']:
    l.append(get_performance_of_poi_column(column,fold=fold,fold_repeat=fold_repeat))

In [32]:
perf=pd.DataFrame(l)
#perf.to_csv('./logs/rf_performance_kfold_54F_mfcc_data.csv',index=False)
perf

Unnamed: 0,poi_column,Size,tr_acc,tr_precision,tr_recall,tr_f1-score,te_acc,te_precision,te_recall,te_f1-score
0,Is_Bus_stop,"3840>>{0: 2334, 1: 1506}","{'mean': 0.9318294270833334, 'std': 0.00409902...","{'mean': 0.9318997465960104, 'std': 0.00415143...","{'mean': 0.9318294270833334, 'std': 0.00409902...","{'mean': 0.9318499943164782, 'std': 0.00411888...","{'mean': 0.8853645833333332, 'std': 0.01173020...","{'mean': 0.8853154686276865, 'std': 0.01184708...","{'mean': 0.8853645833333332, 'std': 0.01173020...","{'mean': 0.8850989862367207, 'std': 0.01182192..."
1,Is_Turn,"3840>>{0: 2819, 1: 1021}","{'mean': 0.9336848958333335, 'std': 0.00390789...","{'mean': 0.9342566517805355, 'std': 0.00382537...","{'mean': 0.9336848958333335, 'std': 0.00390789...","{'mean': 0.9318005987448263, 'std': 0.00418122...","{'mean': 0.878828125, 'std': 0.011527401919282...","{'mean': 0.8770188788376594, 'std': 0.01269194...","{'mean': 0.878828125, 'std': 0.011527401919282...","{'mean': 0.8734227684717637, 'std': 0.01225364..."
2,Is_Signal,"3840>>{0: 3576, 1: 264}","{'mean': 0.98697265625, 'std': 0.0013460827670...","{'mean': 0.9869166164966395, 'std': 0.00135196...","{'mean': 0.98697265625, 'std': 0.0013460827670...","{'mean': 0.9864429715459531, 'std': 0.00147085...","{'mean': 0.9673697916666667, 'std': 0.00556347...","{'mean': 0.96575949741463, 'std': 0.0064375830...","{'mean': 0.9673697916666667, 'std': 0.00556347...","{'mean': 0.9641505167479022, 'std': 0.00670494..."
3,Is_Congestion,"3840>>{0: 2975, 1: 865}","{'mean': 0.8700846354166667, 'std': 0.00388462...","{'mean': 0.8835955219440831, 'std': 0.00321138...","{'mean': 0.8700846354166667, 'std': 0.00388462...","{'mean': 0.8503198199511276, 'std': 0.00545671...","{'mean': 0.8202604166666667, 'std': 0.00801795...","{'mean': 0.8122174357205277, 'std': 0.01269720...","{'mean': 0.8202604166666667, 'std': 0.00801795...","{'mean': 0.7871819036319588, 'std': 0.01141536..."
4,Is_Adhoc,"3840>>{0: 2526, 1: 1314}","{'mean': 0.8513020833333332, 'std': 0.00592447...","{'mean': 0.8604016602930661, 'std': 0.00584751...","{'mean': 0.8513020833333332, 'std': 0.00592447...","{'mean': 0.8428889784143355, 'std': 0.00678011...","{'mean': 0.7702604166666667, 'std': 0.01311983...","{'mean': 0.7688819001622604, 'std': 0.01570501...","{'mean': 0.7702604166666667, 'std': 0.01311983...","{'mean': 0.7544762698283632, 'std': 0.01463966..."


In [8]:
perf=pd.DataFrame(l)
#perf.to_csv('./logs/rf_performance_kfold_54F_mfcc_data.csv',index=False)
perf

Unnamed: 0,poi_column,Size,tr_acc,tr_precision,tr_recall,tr_f1-score,te_acc,te_precision,te_recall,te_f1-score
0,Is_Bus_stop,"4396>>{0: 2809, 1: 1587}","{'mean': 0.9345825978353798, 'std': 0.00381725...","{'mean': 0.9344701105652459, 'std': 0.00383978...","{'mean': 0.9345825978353798, 'std': 0.00381725...","{'mean': 0.9342270827414707, 'std': 0.00385969...","{'mean': 0.8855316475333541, 'std': 0.01012423...","{'mean': 0.8851189970871531, 'std': 0.01035313...","{'mean': 0.8855316475333541, 'std': 0.01012423...","{'mean': 0.8844333206372507, 'std': 0.01020207..."
1,Is_Turn,"4396>>{0: 3276, 1: 1120}","{'mean': 0.9054254097520156, 'std': 0.00385659...","{'mean': 0.9122830671932525, 'std': 0.00343763...","{'mean': 0.9054254097520156, 'std': 0.00385659...","{'mean': 0.8984805593719746, 'std': 0.00454508...","{'mean': 0.8603953356086461, 'std': 0.01025196...","{'mean': 0.8622010751878573, 'std': 0.01165626...","{'mean': 0.8603953356086461, 'std': 0.01025196...","{'mean': 0.8475005306720808, 'std': 0.01236886..."
2,Is_Signal,"4396>>{0: 4110, 1: 286}","{'mean': 0.9862261005620999, 'std': 0.00140952...","{'mean': 0.9861465925159372, 'std': 0.00142655...","{'mean': 0.9862261005620999, 'std': 0.00140952...","{'mean': 0.985587608111844, 'std': 0.001553355...","{'mean': 0.9651273916640811, 'std': 0.00454167...","{'mean': 0.9627394867624929, 'std': 0.00561857...","{'mean': 0.9651273916640811, 'std': 0.00454167...","{'mean': 0.9616624471439849, 'std': 0.00553894..."
3,Is_Congestion,"4396>>{0: 3398, 1: 998}","{'mean': 0.8597305368399158, 'std': 0.00328343...","{'mean': 0.8677653525619768, 'std': 0.00309382...","{'mean': 0.8597305368399158, 'std': 0.00328343...","{'mean': 0.8387670808455404, 'std': 0.00491358...","{'mean': 0.8195412917571621, 'std': 0.00824823...","{'mean': 0.8096523315401446, 'std': 0.01335417...","{'mean': 0.8195412917571621, 'std': 0.00824823...","{'mean': 0.7889141713696888, 'std': 0.01058180..."
4,Is_Adhoc,"4396>>{0: 2787, 1: 1609}","{'mean': 0.8419015715314822, 'std': 0.00594957...","{'mean': 0.8491577909725458, 'std': 0.00557918...","{'mean': 0.8419015715314822, 'std': 0.00594957...","{'mean': 0.8349556849949231, 'std': 0.00668327...","{'mean': 0.7524587340986659, 'std': 0.01244904...","{'mean': 0.7503864657914028, 'std': 0.01438477...","{'mean': 0.7524587340986659, 'std': 0.01244904...","{'mean': 0.7393927354622656, 'std': 0.01349121..."


In [11]:
#NICE