In [7]:
import os
import pickle
import pandas as pd
import numpy as np
import xgboost as xgb
from calculate_confusion_matrix import calculate_confusion_matrix
import time
from copy import deepcopy
from utils import one_hot_encoder
# from sklearn.preprocessing import OneHotEncoder
# from sklearn import preprocessing

save_results = True

n_boot = 100
split = 0.7
np.random.seed(seed=0)

ft_dir = 'features_long/'

# list feature files
files = os.listdir(ft_dir)

# reading top locations
with open('top_locations.dat','rb') as f:
    location_top = pickle.load(f)
f.close()

target_all = []
for filename in files:
    with open(ft_dir+filename, 'rb') as f:  
        _, target = pickle.load(f)

        # only keeping top locations
        ind = np.array([], int)
        for (i,loc) in enumerate(target['location']):
            if loc in location_top:
                ind = np.append(ind, i)
        target = target.loc[ind]
        target = target.reset_index(drop=True)
        
        target_all.append(target)
        
    f.close()

confs = []
aucs = []
labels = []
inds = np.arange(0,len(target_all),1)

for i in range(n_boot):
    
    print('------------------')
    print(i)
    
#     ind_boot = np.random.choice(inds, size=inds.size, replace=True)
    ind_boot = np.random.choice(inds, size=inds.size-int(np.floor(inds.size*split)), replace=False)
    
    y_report = pd.concat([target_all[j]['location'] for j in ind_boot], axis=0)
    y_fsq = pd.concat([target_all[j]['fsq'] for j in ind_boot], axis=0)
    
    # foursquare performance
    conf, roc_auc = calculate_confusion_matrix(y_fsq, y_report)
    
    labels.append(np.unique(y_report))
    confs.append(conf)
    aucs.append(roc_auc)

    print(np.unique(y_report))
    print(roc_auc, np.nanmean(roc_auc))
    
if save_results:
    with open('auc_location_new_10fold_fsq3.dat','wb') as f:
        pickle.dump([aucs, confs, labels], f)
    f.close()


------------------
0
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.55988768  0.63424298  0.72349414  0.54240931  0.50543778
  0.57175766  0.73528812         nan  0.50644515         nan] 0.597370353203
------------------
1
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.61685297  0.66502325  0.63558651  0.64432614  0.50819295
  0.53772039  0.73320579         nan  0.527384           nan] 0.608536499739
------------------
2




["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.61252782  0.6584663   0.68719191  0.54680955  0.50690401
  0.55350519  0.71497639         nan  0.51170961         nan] 0.599011347341
------------------
3
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.58392128  0.65003152  0.70879014  0.57777131  0.50299869
  0.54102939  0.72781031         nan  0.50077654         nan] 0.599141149269
------------------
4
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.58914634  0.69489328  0.74276841  0.55014554  0.50964869
  0.56142322  0.70

["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.60325505  0.66044326  0.78082288  0.5463867   0.50462957
  0.56102262  0.75835082         nan  0.5088509          nan] 0.615470225124
------------------
27
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.59249     0.67187429  0.6657173   0.51789663  0.50500324
  0.52226172  0.74156011         nan  0.49915979         nan] 0.58949538732
------------------
28
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.61013781  0.6251764   0.74973894  0.604415    0.50613882
  0.55000729  0.6

["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.60001607  0.59809427  0.72709057  0.54087526  0.50269914
  0.53530606  0.71590545         nan  0.5200804          nan] 0.592508403212
------------------
52
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.57055131  0.69135196  0.75507736  0.51732771  0.51193108
  0.53933734  0.69955624         nan  0.52333523         nan] 0.601058529074
------------------
53
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.58052975  0.65561124  0.71862917  0.64258544  0.50421412
  0.53178391  0.

["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.62041751  0.65121659  0.75097134  0.58897917  0.50875324
  0.55275472  0.69565306         nan  0.50040538         nan] 0.608643876341
------------------
76
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.5938993   0.63549605  0.69461625  0.50821784  0.50777775
  0.55031169  0.74769034         nan  0.50630484         nan] 0.593039255743
------------------
77
["Another's Home" 'Arts & Entertainment' 'Food' 'Home' 'Nightlife Spot'
 'Outdoors & Recreation' 'Professional Or Medical Office' 'Shop Or Store'
 'Spiritual' 'Travel Or Transport' 'Work']
[        nan  0.575756    0.67784228  0.77343053  0.5258332   0.51711062
  0.55648461  0.