In [None]:
import trueskill as ts

In [None]:
import argparse
import json
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import osmnx as ox
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm, trange
import glob
from sklearn.svm import NuSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from os import path

# spectral clustering
from sklearn.datasets import make_classification
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from sklearn.mixture import GaussianMixture
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import Birch
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

import numpy as np
import os
import pandas as pd
import pickle

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix

In [None]:
_RANDOM_STATE = 2

# Data

## Deep Features

In [None]:
deep_features_file = os.path.join('data', 'berlin_inceptionv3.csv')

In [None]:
data_deep = pd.read_csv(deep_features_file)

In [None]:
# Unpack features since they are in [[...]] object
data_deep['features'] = data_deep['features'].apply(lambda x: np.array(eval(x)), 0).apply(lambda x: x[0])

In [None]:
data_deep['image_i'] = data_deep.image.str.replace('/mnt/datasets/mapillary/berlin/', '').str.replace('.jpg', '').astype(int)

In [None]:
data_deep = data_deep.copy()
data_deep = data_deep.set_index('image_i')

## Scores data

In [None]:
comparisons = pickle.load(open('data/comparisons_berlin.p', 'rb'))
print(comparisons.shape)

In [None]:
X_train, X_test, = train_test_split(comparisons, test_size=0.15, random_state=_RANDOM_STATE)
print('Train:', X_train.shape)
print('Test:', X_test.shape)

In [None]:
images_list = pd.unique(comparisons[['image_l', 'image_r']].values.ravel('K'))

In [None]:
unique_images = pd.unique(comparisons[['image_l', 'image_r']].values.ravel('K'))

# TrueSkill

## Initialize TrueSkill scores

In [None]:
scores = {}

for image in unique_images:
    scores[image] = ts.Rating()

## Compute scores based on comparisons

In [None]:
for i, row in X_train.iterrows():
    # Define the players in this round
    player1 = scores[row['image_l']]
    player2 = scores[row['image_r']]
    
    # Process match
    if row['score'] == -1:
        score = [0, 1]
    elif row['score'] == 0:
        score = [0, 0]
    elif row['score'] == 1:
        score = [1, 0]
    
    [player1], [player2] = ts.rate([[player1], [player2]], ranks=score)

    # Update scores
    scores[row['image_l']] = player1
    scores[row['image_r']] = player2

## Organize data scores

In [None]:
scores_df = pd.DataFrame(scores).T
scores_df.columns = ['score', 'sigma']

In [None]:
scores_df.index = scores_df.index.astype(int)

In [None]:
scores_df = pickle.load(open('output/trueskill_scores_SEED1.p', 'rb'))
scores_df.index = scores_df.index.astype(int)

In [None]:
scores_df = scores_df[scores_df.sigma <= 5.2]
scores_df.shape

## Intersect deepfeatures data and scores

In [None]:
intersection_deep = data_deep.index.intersection(scores_df.index)

In [None]:
data_deep = data_deep.loc[intersection_deep]
scores_df_deep = scores_df.loc[intersection_deep]

print(scores_df_deep.shape, data_deep.shape)

In [None]:
X = np.stack(data_deep['features'].to_numpy())
X_std = StandardScaler().fit_transform(X)
y = MinMaxScaler().fit_transform(scores_df_deep[['score']])[:, 0]
#data_name = 'all'
#data_segment = data_segments[0]# data_segments[0][1:]
#X = data_context[data_segment].fillna(0)
#X_std = StandardScaler().fit_transform(X)
#X = pd.DataFrame(X_std, index=X.index, columns=X.columns)
#y = MinMaxScaler().fit_transform(scores_df_deep[['score']])[:, 0]

In [None]:
mask_3 = y > y.mean()+y.std()
mask_1 = y < y.mean()-y.std()
mask_2 = ~mask_3 & ~mask_1


In [None]:
y[mask_3] = 3
y[mask_2] = 2
y[mask_1] = 1
y

In [None]:
unique, counts = np.unique(y, return_counts=True)

In [None]:
counts

In [None]:
%%time
results = {}

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    # random_state=2, 
                                                   )

X_subset = X_train
y_subset = y_train

# NuSVR
print('Estimating model.')
reg = xgb.XGBRegressor()
svm_random = RandomizedSearchCV(estimator=reg, 
                                param_distributions={'n_estimators':  [int(x) for x in np.linspace(start=10, stop=200, num=11)] ,
                                                     'max_depth':     [int(x) for x in np.linspace(start=1, stop=5, num=5)],
                                                     'learning_rate': [0.1, 0.05, 0.01],
                                                     'subsample': [0.5, 0.3, .2, 0.1],
                                                     'colsample_bytree': [0.5, 0.3, .2, 0.1],
                                                    }, 
                                n_iter=100, 
                                cv=7, 
                                verbose=2, n_jobs=-1)

#reg = NuSVR()
#svm_random = RandomizedSearchCV(estimator=reg, 
#                                param_distributions={'nu': [.5],
#                                                     'C':  [.01],
#                                                     'kernel': ['rbf'],
#                                                    }, 
#                                n_iter=1, 
#                                cv=5, 
#                                verbose=3, n_jobs=-1)

svm_random.fit(X, y)

# Save model results
#results[data_name] = {}
#results[data_name]['best_params'] = svm_random.best_params_

# Save model
#results[data_name]['regression_object'] = svm_random.best_estimator_

# Save score
#results[data_name]['score'] = svm_random.score(X_subset, y_subset)
print('Model estimated. R^2: {:.3f} / {:.3f}'.format(svm_random.best_score_, svm_random.score(X, y)))
print('Hyperparameters:', svm_random.best_params_)
print()


In [None]:
print('Model estimated. R^2: {:.3f} / {:.3f}'.format(svm_random.best_score_, svm_random.score(X, y)))
print('Hyperparameters:', svm_random.best_params_)

In [None]:

plt.scatter(y, svm_random.predict(X), c='r')
#plt.scatter(y_test, reg.predict(X_test), c='b')
plt.xlim(y.min(), y.max())
plt.ylim(y.min(), y.max());
plt.plot([y.min(), y.max()], [y.min(),y.max()], 'r.', linestyle="--")

plt.show()

# Classification

In [None]:
X = np.stack(data_deep['features'].to_numpy())
X_std = StandardScaler().fit_transform(X)
y = MinMaxScaler().fit_transform(scores_df_deep[['score']])[:, 0]
#data_name = 'all'
#data_segment = data_segments[0]# data_segments[0][1:]
#X = data_context[data_segment].fillna(0)
#X_std = StandardScaler().fit_transform(X)
#X = pd.DataFrame(X_std, index=X.index, columns=X.columns)
#y = MinMaxScaler().fit_transform(scores_df_deep[['score']])[:, 0]

In [None]:
#mask_3 = y > y.mean()+y.std()
#mask_1 = y < y.mean()-y.std()
#mask_2 = ~mask_3 & ~mask_1
#
#y[mask_3] = 3
#y[mask_2] = 2
#y[mask_1] = 1
#y = y-1

delta = 1.

mask_2 = y > y.mean() + delta * y.std()
mask_1 = y <= y.mean() - delta * y.std()

y[mask_2] = 2
y[mask_1] = 1
y = y[mask_1 | mask_2]
y = y-1

X = X[mask_1 | mask_2]

# mask_5 =  y > y.mean() + 1.0*y.std()
# mask_4 = (y < y.mean() + 1.0*y.std()) & (y > y.mean() + 0.5*y.std())
# mask_3 = (y < y.mean() + 0.5*y.std()) & (y > y.mean() - 0.5*y.std())
# mask_2 = (y < y.mean() - 0.5*y.std()) & (y > y.mean() - 1.0*y.std())
# mask_1 =  y < y.mean() - 1.0*y.std()
# 
# y[mask_5] = 4
# y[mask_4] = 3
# y[mask_3] = 0
# y[mask_2] = 2
# y[mask_1] = 1
# y = y-1




In [None]:
results = []

In [None]:
%%time

results = []
for _ in range(5):
    for alpha in [1.5]: # [x for x in np.linspace(start=0, stop=3, num=21)]:
        # Data
        X = np.stack(data_deep['features'].to_numpy())
        X_std = StandardScaler().fit_transform(X)
        y = MinMaxScaler().fit_transform(scores_df_deep[['score']])[:, 0]

        # Perceived Safe and Perceived Unsafe Environments

        mask_2 = y > y.mean() + alpha * y.std()
        mask_1 = y <= y.mean() - alpha * y.std()
        y[mask_2] = 2
        y[mask_1] = 1
        y = y[mask_1 | mask_2]
        y = y-1

        X = X[mask_1 | mask_2]


        # Split into training and testing
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                            test_size=0.15,
                                                            #random_state=2, 
                                                           )
        # Class weights
        classes_weights = class_weight.compute_sample_weight(
            class_weight='balanced', #'balanced',{0:3, 1:.5, 2:3}, {0:3, 1:3, 2:.5, 3:.5, 4:3}
            y=y_train[y_train!=-1] # y_train[y_train!=-1]
        )

        if y_train.shape[0] < 5:
            break

        # Estimate model and tune hyperparameters via random search
        print('Estimating model.')
        xgb_cl = xgb.XGBClassifier()
        grid_search = {'n_estimators':  [int(x) for x in np.linspace(start=10, stop=200, num=11)] ,
                       'max_depth':     [int(x) for x in np.linspace(start=1, stop=5, num=5)],
                       'learning_rate': [0.1, 0.05, 0.01],
                       'subsample': [0.5, 0.3, .2, 0.1],
                       'colsample_bytree': [0.5, 0.3, .2, 0.1],
                       #'objective': ['multi:softmax']
                      }
        print("Total amount of possibilities: {}".format(np.prod([len(grid_search[key]) for key in grid_search])))

        random = RandomizedSearchCV(estimator=xgb_cl, 
                                    param_distributions=grid_search,
                                    n_iter=2000, 
                                    cv=5, 
                                    verbose=0, n_jobs=-1)

        # Results
        random.fit(X_train, y_train, sample_weight=classes_weights)
        #random.fit(X_train[y_train!=-1], y_train[y_train!=-1], sample_weight=classes_weights) # 
        print('Model estimated. Score over CV: {:.3f} '.format(random.best_score_))
        print('Score train:', random.score(X_train, y_train), 'Score test:', random.score(X_test, y_test))
        cm = confusion_matrix(y_test, random.predict(X_test)) #columns -> predicted / Rows -> True
        print('Hyperparameters:', random.best_params_)
        print()

        results.append({'acc_safe': cm[0,0]/cm[0, :].sum(),  
                        'acc_unsafe': cm[1,1]/cm[1, :].sum(), 
                        'acc_overall': random.score(X_test, y_test),
                        'alpha': alpha,
                       })
    results

In [None]:
results_df = pd.DataFrame(results)

In [None]:
results_df = results_df[results_df.alpha <= 1.7]

In [None]:
import plotly.graph_objects as go

# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=results_df.alpha, y=results_df.acc_overall,
                    mode='lines', name='Overall'))
fig.add_trace(go.Scatter(x=results_df.alpha, y=results_df.acc_unsafe,
                    mode='lines',
                    name='Perceived Unsafe'))
fig.add_trace(go.Scatter(x=results_df.alpha, y=results_df.acc_safe,
                    mode='lines',
                    name='Perceived Safe'))

                 

fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    width=800, height=500,
    xaxis_title="α",
    yaxis_title="Accuracy",
    font=dict(
        family="Times New Roman",
        size=18,
        color="Black"
    ),
    legend=dict(
        yanchor="top",
        y=0.9,
        xanchor="left",
        x=0.01
    ),
    
    
)
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', tick0=0.5, dtick=0.5)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', tick0=0.5, dtick=0.1,
                 showgrid=True, gridwidth=1, gridcolor='grey')

fig.show()