In [2]:
import os
import re

import numpy as np
from scipy import stats
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
tqdm.pandas()

In [3]:
from constants import CEN_YEARS

In [13]:
import geopandas as gpd
from tqdm import tqdm

# Toronto city center coordinates (latitude, longitude)
TORONTO_CENTER = (43.652, -79.383)

# Assuming CEN_YEARS is defined somewhere
# CEN_YEARS = [year1, year2, ...]

dists = {
    "year": [],  # not a distance, just for pandas
    "imm": [],
    "not_imm": [],
    "pop": [],
}

for cen_year in tqdm(CEN_YEARS):
    if (cen_year < 1961) or (cen_year == 1966) or (cen_year == 1976):
        continue

    # Remove this line if you want to process all years, not just 2021
    # if cen_year != 2021:
    #     continue

    # Load the data
    gdf_imm_stats = gpd.read_file(f"../data/immigration/{cen_year}/imm_stats_{cen_year}.gpkg")
    
    # Create non-immigrant count column
    gdf_imm_stats['num_not_imm_tot'] = gdf_imm_stats['num_pop_tot'] - gdf_imm_stats['num_imm_tot']
    
    # Create normalized weights
    gdf_imm_stats['pop_norm'] = gdf_imm_stats['num_pop_tot'] / gdf_imm_stats['num_pop_tot'].sum()
    gdf_imm_stats['imm_norm'] = gdf_imm_stats['num_imm_tot'] / gdf_imm_stats['num_imm_tot'].sum()
    gdf_imm_stats['not_imm_norm'] = gdf_imm_stats['num_not_imm_tot'] / gdf_imm_stats['num_not_imm_tot'].sum()
    
    # Create centroids - ensure CRS is appropriate for distance calculations
    gdf_imm_stats = gdf_imm_stats.to_crs('EPSG:3857')  # Convert to projected CRS for accurate distance calculations
    gdf_imm_stats['centroid'] = gdf_imm_stats.geometry.centroid
    
    # Create Point for Toronto center in the same CRS
    toronto_center = gpd.points_from_xy([-79.383], [43.652], crs='EPSG:4326').to_crs('EPSG:3857')[0]
    
    # Calculate distances from each centroid to Toronto center (in meters)
    gdf_imm_stats['distance'] = gdf_imm_stats['centroid'].distance(toronto_center)
    
    # Calculate weighted mean distances
    weighted_imm_dist = (gdf_imm_stats['distance'] * gdf_imm_stats['imm_norm']).sum()
    weighted_not_imm_dist = (gdf_imm_stats['distance'] * gdf_imm_stats['not_imm_norm']).sum()
    weighted_pop_dist = (gdf_imm_stats['distance'] * gdf_imm_stats['pop_norm']).sum()
    
    # Convert distances to kilometers
    dists['year'].append(cen_year)
    dists['imm'].append(weighted_imm_dist / 1000)
    dists['not_imm'].append(weighted_not_imm_dist / 1000)
    dists['pop'].append(weighted_pop_dist / 1000)

# Convert to pandas DataFrame
df_dists = pd.DataFrame.from_dict(dists)
df_dists.to_csv('../data/immigration/stats/pop_dists.csv', index=False)
df_dists.to_csv('../static/data/immigration_analysis/pop_dists.csv', index=False)

100%|██████████| 15/15 [00:03<00:00,  4.29it/s]
