In [None]:
import pandas as pd
import json

def make_categorical(df):
    new_df = df.copy()

    # convert all columns to categorical integers
    for col in new_df.columns:
        new_df[col] = new_df[col].astype('category')
    cat_columns = new_df.select_dtypes(['category']).columns
    new_df[cat_columns] = new_df[cat_columns].apply(lambda x: x.cat.codes)

    # create domain for RAP
    domain = {}
    for col in new_df.columns:
        domain[col] = len(new_df[col].unique())

    return new_df, domain

In [None]:
# download ACS dataset
from folktables import ACSDataSource, ACSEmployment

data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
acs_data = data_source.get_data(states=["CA"], download=True)
acs, _, _ = ACSEmployment.df_to_pandas(acs_data)

acs

In [None]:
import json

# pre-process ACS dataset
# reduce dimensionality of AGEP attribute
acs['AGEP'] = acs['AGEP'].apply(lambda x: int(x) // 10)

# convert all columns to integers
acs, acs_domain = make_categorical(acs)

# save pre-processed dataset and domain
acs.to_csv('acs.csv', index=False)
with open('domain/acs-domain.json', 'w') as f:
    json.dump(acs_domain, f)

acs

In [None]:
# download FIRE dataset from https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3
fire = pd.read_csv('fire_raw.csv')
fire

In [None]:
# keep only categorical columns
cat_cols = ['Call Type', 'Call Final Disposition', 'City', 'Zipcode of Incident', 'Battalion', 'Station Area', 'Priority', 'ALS Unit', 'Call Type Group', 'Number of Alarms']
fire, fire_domain = make_categorical(fire[cat_cols])

# save pre-processed dataset and domain
fire.to_csv('fire.csv', index=False)
with open('domain/fire-domain.json', 'w') as f:
    json.dump(fire_domain, f)

fire