In [24]:
import os
import pickle
import pandas as pd
import numpy as np
from calculate_auc import calculate_auc

save_results = True

n_boot = 100
split = 0.7
np.random.seed(seed=0)

ft_dir = 'features_long/'

# list feature files
files = os.listdir(ft_dir)

# reading top locations
with open('top_locations.dat','rb') as f:
    location_top = pickle.load(f)
f.close()
location_top = np.append(location_top, 'Unknown')

target_all = []
for filename in files:
    with open(ft_dir+filename, 'rb') as f:  
        _, target = pickle.load(f)

        # only keeping locations in location_top and encoding them
        ind = np.array([], int)
        for (i,loc) in enumerate(target['location']):
            if loc in location_top:
                ind = np.append(ind, i)
                target.loc[i,'elocation'] = np.where(location_top==loc)[0]
                target.loc[i,'efsq'] = np.where(location_top==target.loc[i,'fsq'])[0]
        target = target.loc[ind]
        target = target.reset_index(drop=True)
        
        # change encoded column data type to int
        target['elocation'] = target['elocation'].astype(int)
        target['efsq'] = target['efsq'].astype(int)
        
        target_all.append(target)
        
    f.close()

confs = []
aucs = []
labels = []
inds = np.arange(0,len(target_all),1)

for i in range(n_boot):
    
    print('------------------')
    print(i)
    
    # evaluating on the test data (30%)
    ind_boot = np.random.choice(inds, size=inds.size-int(np.floor(inds.size*split)), replace=False)
    
    y_report = pd.concat([target_all[j]['elocation'] for j in ind_boot], axis=0)
    y_fsq = pd.concat([target_all[j]['efsq'] for j in ind_boot], axis=0)
    
    # foursquare performance
    conf, roc_auc = calculate_auc(y_fsq, y_report, location_top.size)
    
    labels.append(np.unique(y_report))
    confs.append(conf)
    aucs.append(roc_auc)

    print(np.unique(y_report))
    print(roc_auc, np.nanmean(roc_auc))
    
if save_results:
    with open('auc_fsq.dat','wb') as f:
        pickle.dump([aucs, confs, labels], f)
    f.close()


------------------
0
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.52655386  0.56155261  0.5         0.59477784  0.63709213  0.5
  0.69428085  0.5         0.65162455  0.52301372  0.74446999  0.57748863] 0.584237848887
------------------
1
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.55040095  0.62832621  0.5         0.60812247  0.60591769  0.5
  0.64582665  0.5         0.64889278  0.51235699  0.77188792         nan] 0.58833924123
------------------
2
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.531475    0.61537878  0.5         0.5781907   0.64836352  0.5
  0.68645114  0.5         0.6347881   0.52151278  0.74476835         nan] 0.587357123166
------------------
3
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.50187105  0.57726089  0.5         0.54669488  0.69322301  0.5
  0.65898415  0.5         0.62289651  0.52758344  0.76256064  0.55791757] 0.579082678672
------------------
4
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.53550418  0.60473883  0.5         0.59656688  0.68957207  0.5
  0.6658286   0.5         

[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.54950073  0.52245488  0.5         0.57821429  0.65041738  0.5
  0.62963497  0.5         0.63617054  0.53067532  0.72773452         nan] 0.574982057533
------------------
41
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.55668349  0.674967    0.5         0.55505715  0.68536065  0.5
  0.60857109  0.5         0.62427897  0.51308272  0.72882948  0.57550234] 0.585194407755
------------------
42
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.56845109  0.66451645  0.5         0.56072733  0.73605188  0.5
  0.58751094  0.5         0.62875028  0.52918742  0.74529575         nan] 0.592771921619
------------------
43
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.54082412  0.54302684  0.5         0.55156541  0.72164397  0.5
  0.66582202  0.5         0.65461504  0.53434723  0.74901981  0.53590738] 0.583064318213
------------------
44
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.51631744  0.59546254  0.5         0.60876623  0.64997457  0.5
  0.68862472  0.5         0.66041315  0.52661

[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.50484808  0.58585033  0.5         0.58774997  0.60701216  0.5
  0.62471443  0.5         0.60152183  0.50190648  0.73337343  0.56169123] 0.567388994345
------------------
82
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.54167769  0.53981283  0.5         0.58140912  0.66831026  0.5
  0.62416909  0.5         0.61696677  0.52272374  0.7422674   0.53994436] 0.573106771064
------------------
83
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0.50323557  0.58478139  0.5         0.58248332  0.65110342  0.5
  0.63192745  0.5         0.6170878   0.53872784  0.74456431  0.56929662] 0.576933977453
------------------
84
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.54851976  0.65157065  0.5         0.64200331  0.69799942  0.5
  0.69108833  0.5         0.63144135  0.53581665  0.7914468          nan] 0.608171478693
------------------
85
[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0.51598988  0.74890039  0.5         0.65160826  0.6623039   0.5
  0.6886174   0.5         0.63213358  0.50