In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import math


In [2]:
# Load the cleaned data
with open('data/cleaned_data.pkl', 'rb') as f:
    df = pickle.load(f)

# Get the list of prefecture names
prefecture_names = list(df.keys())

# Load the prefecture coordinates
coordinates = pd.read_csv('data/japanese_prefectures_coordinates.csv').set_index('Prefecture')

# Sanity check (both lists are ordered)
coordinates.index.tolist() == prefecture_names # Always follow this order

True

In [7]:
# Calculate the distance between two points using lat and long
def get_distance_km(diff_lat_degree, diff_long_degree):
    diff_lat_rad = math.radians(diff_lat_degree)
    diff_long_rad = math.radians(diff_long_degree)

    earth_radius_km = 6371.0 # Globally-averaged earth's radius in kilometers

    distance_km = earth_radius_km * (diff_lat_rad**2 + diff_long_rad**2)**0.5
    return distance_km

# Define the matrix weight using distance
def get_weight(distance_km, sigma=100, epsilon=10e-5):
    weight = np.exp(-distance_km**2 / sigma**2)
    if weight < epsilon:
        return 0
    else:
        return weight
    
# Create an empty adjacency matrix
A_adj = np.zeros((len(prefecture_names), len(prefecture_names)))

for i in range(A_adj.shape[0]):
    for j in range(A_adj.shape[0]):
        if i != j:
            lat1 = coordinates.loc[prefecture_names[i]]['Latitude']
            long1 = coordinates.loc[prefecture_names[i]]['Longitude']
            lat2 = coordinates.loc[prefecture_names[j]]['Latitude']
            long2 = coordinates.loc[prefecture_names[j]]['Longitude']

            distance_km = get_distance_km(lat1-lat2, long1-long2)
            A_adj[i, j] = get_weight(distance_km, sigma=100, epsilon=10e-4)

# Count the number of non-zero elements in A_adj
non_zero_count = np.count_nonzero(A_adj)

print(non_zero_count, len(prefecture_names) * len(prefecture_names))


500 2209
