# Process Start

In [None]:
from census import Lookup, State
import numpy as np

import us
import pandas as pd

In [None]:
lib = Lookup()    # Initialize the library of states

In [None]:
# Get all mainland states
mainland_state_names = [s.name for s in us.STATES if s.name not in ['Alaska', 'Hawaii']]
mainland = [State(state_name, lib) for state_name in mainland_state_names]

# Get table B19001
b19001 = [x.get_geo_table('B19001') for x in mainland]
b19001_entire_df = pd.concat([st.data for st in b19001])

In [None]:
# Assume Earth is a sphere, convert to xyz coordinates so we can eventually construct a KD-tree using the 2-norm
def centroid_xyz(arr):
    r = 3958.8 # miles
    pre_in = np.array([[t[0] for t in x.xy] for x in arr.to_list()])
    theta, phi = np.split(pre_in, 2, axis=1)
    phi = 90 - phi
    phi *= np.pi / 180
    theta *= np.pi / 180
    x = r * np.sin(phi) * np.cos(theta)
    y = r * np.sin(phi) * np.sin(theta)
    z = r * np.cos(phi)
    return np.hstack([x, y, z])

In [None]:
# Get the centroid for every block group in the US.
cxyz = centroid_xyz(b19001_entire_df.centroid)

In [None]:
# Append centroids to the table.
b19001_entire_df['CentroidX'] = cxyz[:, 0]
b19001_entire_df['CentroidY'] = cxyz[:, 1]
b19001_entire_df['CentroidZ'] = cxyz[:, 2]

In [None]:
# Drop the shapes and keep only the centroids - now the DF is only numbers and text.
b19001_reduced_df = b19001_entire_df.drop(columns='geometry')

In [None]:
s# Save the file to a parquet
b19001_reduced_df.to_parquet('B19001_no_geometry.parquet')