In [1]:
import os
import sys
import pandas as pd
import numpy as np

from helpers import (
    peform_mds_division,
)

path = os.path.abspath("../spatial-awareness/geocoding/")
sys.path.append(path)
from geocoding import GeoCoding

%load_ext autoreload
%autoreload 2

In [2]:
cities = pd.read_pickle('cities_llama2.pkl')
cities_list = list(set(cities.a_name.to_list()))

gc = GeoCoding()

coods = []
coods_dic = {}
for each in cities_list:
    lat, lng, _, _ , _ = gc.get_lat_lng(each)
    coods.append([lng, lat])
    coods_dic[each] = [lat, lng]
    
coods = np.array(coods)

## Distance

In [3]:
dis_disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in dis_disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['distance'].to_list()[0]

vals = peform_mds_division(
    disparity_df=dis_disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=False,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

56.777679245172465

## Co-occurrence Count

In [4]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['co_occ_count'].to_list()[0]

vals = peform_mds_division(
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

425.6366237850284

## Random

In [5]:
errs = []

for _ in range(10):
    arr_random = np.random.default_rng().uniform(low=1,high=5000,size=[93,93])
    np.fill_diagonal(arr_random, 0)
    disparity_df = pd.DataFrame(arr_random,index=cities_list, columns=cities_list)
    vals = peform_mds_division(
        disparity_df=disparity_df,
        coods_dic=coods_dic,
        metric=True,
        asymmetric=True,
        similarity_measure_used=False
    )
    df = pd.DataFrame(vals)
    errs.append(df.err.mean())

print(errs)
sum(errs)/len(errs)

[517.4248259043843, 508.3904384240066, 471.56441849382355, 494.280046673838, 459.487992074146, 483.9616512182175, 493.1285375513954, 504.74944283387373, 485.7481440502538, 463.1991700267782]


488.19346672507174

## Predicted Dis

In [6]:
cities[['distance', 'predicted_dis']].corr('spearman')

Unnamed: 0,distance,predicted_dis
distance,1.0,0.988498
predicted_dis,0.988498,1.0


In [7]:
dis_disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in dis_disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['predicted_dis'].to_list()[0]

vals = peform_mds_division(
    disparity_df=dis_disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

109.23611776927801

## And Counts

In [8]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['and_count'].to_list()[0]

vals = peform_mds_division(
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

456.04534803999275

## Near Count

In [9]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['near_count'].to_list()[0]

vals = peform_mds_division(
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

322.8193074982972

## Close to Count

In [10]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['close_count'].to_list()[0]

vals = peform_mds_division(
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=True,
    asymmetric=True,
    similarity_measure_used=True
)
df = pd.DataFrame(vals)
df.err.mean()

351.3850410375941

## Far Count

In [11]:
disparity_df = pd.DataFrame(0.0,index=cities_list, columns=cities_list)

for i, each in disparity_df.iterrows():
    for c in cities_list:
        if i == c:
            continue
        d = cities.loc[(cities.a_name == i) & (cities.b_name == c)]
        each[c] = d['far_count'].to_list()[0]

vals = peform_mds_division(
    disparity_df=disparity_df,
    coods_dic=coods_dic,
    metric=False,
    asymmetric=True,
    similarity_measure_used=False
)
df = pd.DataFrame(vals)
df.err.mean()

455.64383241028185

#