In [8]:
import os
import sys
import pandas as pd
import numpy as np

from helpers import (
    peform_mds
)

path = os.path.abspath("../spatial-awareness/geocoding/")
sys.path.append(path)
from geocoding import GeoCoding

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
cities = pd.read_pickle('cities_llama2.pkl')
cities_list = list(set(cities.a_name.to_list()))

gc = GeoCoding()

coods = []
coods_dic = {}
for each in cities_list:
    lat, lng, _, _ , _ = gc.get_lat_lng(each)
    coods.append([lng, lat])
    coods_dic[each] = [lat, lng]
    
coods = np.array(coods)

## Distance

In [10]:
dis_disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in dis_disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['distance'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=dis_disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=False,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

190.40960552991757

## Co-occurrence Count

In [11]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['co_occ_count'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

1237.0059654125573

## Random

In [12]:
errs = []

for _ in range(10):
    arr_random = np.random.default_rng().uniform(low=1,high=5000,size=[93,93])
    np.fill_diagonal(arr_random, 0)
    disparity_df = pd.DataFrame(arr_random,index=cities_list, columns=cities_list)
    vals = peform_mds(
        cities_list=cities_list,
        disparity_df=disparity_df,
        coods_dic=coods_dic,
        metric=True,
        asymmetric=True,
        similarity_measure_used=False
    )
    df = pd.DataFrame(vals)
    errs.append(df.err.mean())

print(errs)
sum(errs)/len(errs)

[1388.0560410865987, 1443.8622138446954, 1461.7821515003518, 1458.348366103717, 1444.2227972938572, 1491.8236910552205, 1420.9485555060144, 1332.349729403142, 1414.7536787694237, 1409.8539125054724]


1426.6001137068495

## Predicted Dis

In [13]:
cities[['distance', 'predicted_dis']].corr('spearman')

Unnamed: 0,distance,predicted_dis
distance,1.0,0.988498
predicted_dis,0.988498,1.0


In [14]:
dis_disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in dis_disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['predicted_dis'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=dis_disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

196.28683625019195

## And Counts

In [15]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['and_count'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

1441.793372151848

## Near Count

In [16]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['near_count'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

760.4634881041295

## Close to Count

In [17]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['close_count'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

766.9155696789946

## Far Count

In [18]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['far_count'].to_list()[0]

vals = peform_mds(
    cities_list=cities_list,
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=False,
    asymmetric=True,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

1372.3061491722067

#