# Imports

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from imblearn.over_sampling import SMOTE
import pandas as pd
import os

## Constants

In [2]:
parent_dir = r"../data/54ft/"
train_df_name = r"54ft_train.csv"

#feature # binding
F=\
    {1:'stay_duration', #f1
     2:'mfcc0',3:'mfcc1',4:'mfcc2',5:'mfcc3',6:'mfcc4', #f2,f3,f4,f5,f6
     7:'wifi_count', 8:'edge_wifi_count', #f7,f8
     9:'RSI', #f9
     10:'human_made', 11:'natural_land',12:'road_exist_percent',13:'highly_populated_poi_exist'#f10,f11,f12,f13
    }

selected_feat=\
    {
        'Is_Bus_stop':[F[e] for e in [10,12,11,1,8]],
        'Is_Turn':[F[e] for e in [10,12,11,1,8,9]],
        'Is_Signal':[F[e] for e in [12,11,10,8,9]],
        'Is_Congestion':[F[e] for e in [2,11,10,9,12,8,6,1]],
        'Is_Adhoc':[F[e] for e in [10,11,1,12,2]]
    }


#file Names
zone_names=list(map(lambda e:f"{e}_test_split.csv",["station","dvc_more","54ft_road","junction_mall","prantika_bus_stand"]))

_7day_test=["54ft_test.csv"]

In [3]:
#Modeling function
def get_metrics_from_the_model(train,labels_train,test,labels_test,poi,zone):
    rf=RandomForestClassifier(n_estimators=100,max_depth=8)
    rf.fit(train, labels_train)

    pred_train= rf.predict(train)
    pred_test= rf.predict(test)
    
    performance={'poi':poi,'zone':zone,
                 'tr_acc':accuracy_score(labels_train,pred_train),
                 'tr_precision':precision_score(labels_train,pred_train,average='weighted'),
                 'tr_recall':recall_score(labels_train,pred_train,average='weighted'),
                 'tr_f1-score':f1_score(labels_train,pred_train,average='weighted'),

                 'te_acc':accuracy_score(labels_test,pred_test),
                 'te_precision':precision_score(labels_test, pred_test,average='weighted'),
                 'te_recall':recall_score(labels_test, pred_test,average='weighted'),
                 'te_f1-score':f1_score(labels_test, pred_test,average='weighted')}
    return performance,pred_test
    

## Modeling

In [4]:
perf=[] #performance list


for poi_column in ['Is_Bus_stop','Is_Turn','Is_Signal','Is_Congestion','Is_Adhoc']:
    feature_names=selected_feat[poi_column] #feature selection

    for test_df_name in (zone_names+_7day_test):#iterator name

        zoneName=test_df_name.split('_test_split.csv')[0].split('.')[0]
        output_result_filename = f"{poi_column}_test_result_{zoneName}_zone.csv"

        #train_data_processing
        train_csv_df = pd.read_csv(os.path.join(parent_dir, train_df_name))
        train_df = train_csv_df[feature_names+[poi_column]]

        #test data processing
        test_csv_df = pd.read_csv(os.path.join(parent_dir, test_df_name))
        test_df = test_csv_df[feature_names+[poi_column]]


        #SMOTE on training data & get features ,labels
        X = train_df[feature_names].copy()
        y = train_df[poi_column].copy()

        smote = SMOTE()
        X_resampled, y_resampled = smote.fit_sample(X, y)
        train_data = X_resampled.copy()
        train_labels = y_resampled.copy()

        #Test set get features & labels
        test_data = test_df[feature_names].values
        test_labels = test_df[poi_column].values

        #Training with Random Forest
        performance,pred_test=get_metrics_from_the_model(train_data,train_labels,test_data,test_labels,poi_column,zoneName)

        #adding to performance list
        perf.append(performance)

        #creating prediction file
        test_csv_df[f'Prediction {poi_column}']=pred_test
        result_df=test_csv_df[['start_date','start_time','end_time',f'Prediction {poi_column}']].copy()
        result_df.columns=['instance_date', 'instance_start_time', 'instance_end_time',f'Prediction {poi_column}']
        result_df.to_csv(os.path.join(parent_dir, output_result_filename), index=False)

        print(f'Completed for POI:{poi_column} and Zone:{zoneName} get results of length:{result_df.shape[0]}')

Completed for POI:Is_Bus_stop and Zone:station get results of length:72
Completed for POI:Is_Bus_stop and Zone:dvc_more get results of length:68
Completed for POI:Is_Bus_stop and Zone:54ft_road get results of length:98
Completed for POI:Is_Bus_stop and Zone:junction_mall get results of length:54
Completed for POI:Is_Bus_stop and Zone:prantika_bus_stand get results of length:61
Completed for POI:Is_Bus_stop and Zone:54ft_test get results of length:1360
Completed for POI:Is_Turn and Zone:station get results of length:72
Completed for POI:Is_Turn and Zone:dvc_more get results of length:68
Completed for POI:Is_Turn and Zone:54ft_road get results of length:98
Completed for POI:Is_Turn and Zone:junction_mall get results of length:54
Completed for POI:Is_Turn and Zone:prantika_bus_stand get results of length:61
Completed for POI:Is_Turn and Zone:54ft_test get results of length:1360
Completed for POI:Is_Signal and Zone:station get results of length:72
Completed for POI:Is_Signal and Zone:dvc_m

# Saving Performance for all Modelings

In [5]:
df=pd.DataFrame(perf)
df.to_csv(os.path.join(parent_dir, 'modeling_performance.csv'),index=False)
df

Unnamed: 0,poi,zone,tr_acc,tr_precision,tr_recall,tr_f1-score,te_acc,te_precision,te_recall,te_f1-score
0,Is_Bus_stop,station,0.923254,0.924235,0.923254,0.92321,0.847222,0.84842,0.847222,0.83685
1,Is_Bus_stop,dvc_more,0.921278,0.922481,0.921278,0.921222,0.926471,0.933407,0.926471,0.923039
2,Is_Bus_stop,54ft_road,0.921937,0.923202,0.921937,0.921878,0.826531,0.827956,0.826531,0.827153
3,Is_Bus_stop,junction_mall,0.924242,0.925146,0.924242,0.924202,0.833333,0.848655,0.833333,0.823434
4,Is_Bus_stop,prantika_bus_stand,0.924242,0.925545,0.924242,0.924184,0.934426,1.0,0.934426,0.966102
5,Is_Bus_stop,54ft_test,0.917984,0.918953,0.917984,0.917937,0.865441,0.866807,0.865441,0.865872
6,Is_Turn,station,0.92257,0.923848,0.92257,0.922512,0.916667,0.933036,0.916667,0.921182
7,Is_Turn,dvc_more,0.916804,0.91809,0.916804,0.91674,0.75,0.746201,0.75,0.743725
8,Is_Turn,54ft_road,0.914607,0.917793,0.914607,0.914444,1.0,1.0,1.0,1.0
9,Is_Turn,junction_mall,0.921746,0.923377,0.921746,0.921671,0.851852,0.874644,0.851852,0.808642


In [6]:
#NICE