In [5]:
import os
import pickle
import pandas as pd
import numpy as np
import xgboost as xgb
from calculate_confusion_matrix import calculate_confusion_matrix
import time
from copy import deepcopy
from utils import one_hot_encoder
# from sklearn.preprocessing import OneHotEncoder
# from sklearn import preprocessing

save_results = True

n_boot = 100
split = 0.7
np.random.seed(seed=0)

ft_dir = 'features_long/'

# list feature files
files = os.listdir(ft_dir)

# reading top locations
with open('top_locations.dat','rb') as f:
    location_top = pickle.load(f)
f.close()

target_all = []
for filename in files:
    with open(ft_dir+filename, 'rb') as f:  
        _, target = pickle.load(f)

        # only keeping top locations
        ind = np.array([], int)
        for (i,loc) in enumerate(target['location']):
            if loc in location_top:
                ind = np.append(ind, i)
        target = target.loc[ind]
        target = target.reset_index(drop=True)
        
        target_all.append(target)
        
    f.close()

confs = []
aucs = []
labels = []
inds = np.arange(0,len(target_all),1)

for i in range(n_boot):
    
    print('------------------')
    print(i)
    
#     ind_boot = np.random.choice(inds, size=inds.size, replace=True)
    ind_boot = np.random.choice(inds, size=inds.size-int(np.floor(inds.size*split)), replace=False)
    
    y_report = pd.concat([target_all[j]['location'] for j in ind_boot], axis=0)
    y_fsq = pd.concat([target_all[j]['fsq'] for j in ind_boot], axis=0)
    
    # foursquare performance
    conf, roc_auc = calculate_confusion_matrix(y_fsq, y_report)
    
    labels.append(np.unique(y_report))
    confs.append(conf)
    aucs.append(roc_auc)

    print(np.unique(y_report))
    print(roc_auc, np.nanmean(roc_auc))
   
# saving the results
if save_results:
    with open('auc_location_new_10fold_fsq3.dat','wb') as f:
        pickle.dump([aucs, confs, labels], f)
    f.close()


------------------
0
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.5688691   0.64662282  0.68290318  0.54787589  0.50456511
         nan         nan] 0.59016722057
------------------
1
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.62021445  0.72236881  0.63204111  0.64187591  0.50729542
         nan         nan] 0.624759141353
------------------
2
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.63249394  0.69217637  0.65750351  0.56376617  0.50610753
         nan         nan] 0.610409



["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.58916075  0.68710157  0.6833867   0.57930845  0.50192235
         nan         nan] 0.608175966413
------------------
4
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.5833968   0.76566741  0.70905404  0.55632295  0.50863267
         nan         nan] 0.624614776831
------------------
5
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.59297885  0.69691583  0.70547079  0.59283816  0.50035594
         nan         nan] 0.617711913395
-------------

["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.58269096  0.70352707  0.72312423  0.62744755  0.50586992
         nan         nan] 0.628531948195
------------------
32
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.6192767   0.62997773  0.71507129  0.71111399  0.51299439
         nan         nan] 0.63768682203
------------------
33
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.64724032  0.69546806  0.67109529  0.55547364  0.51006245
         nan         nan] 0.615867952231
------------

["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.6027635   0.68191113  0.65569934  0.58829235  0.50256737
         nan         nan] 0.606246738671
------------------
60
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.66005458  0.68944779  0.68539247  0.58946846  0.50683358
         nan         nan] 0.626239376766
------------------
61
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.59681352  0.62900909  0.70400356  0.69117265  0.50627278
         nan         nan] 0.625454320783
-----------

["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.57466616  0.69963176  0.73480355  0.555474    0.50888552
         nan         nan] 0.614692197043
------------------
88
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.74364663  0.67294837  0.68235774  0.64441497  0.50969473
         nan         nan] 0.65061248861
------------------
89
["Another's Home" 'Arts & Entertainment (Theater, Music Venue, Etc.)'
 'Food (Restaurant, Cafe)' 'Home' 'Nightlife Spot (Bar, Club)'
 'Outdoors & Recreation' 'Spiritual (Church, Temple, Etc.)' 'Work']
[        nan  0.56819832  0.62014858  0.66671636  0.51795945  0.50897767
         nan         nan] 0.576400075504
------------