In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import sklearn as skl

In [2]:
Koob = pd.read_csv('Koob_Master.csv', low_memory=False)
Jovi = pd.read_csv('Jovi_Master.csv', low_memory=False)
Arwen = pd.read_csv('Arwen_Master.csv', low_memory=False)

In [3]:
# Koob.columns

In [4]:
Koob = Koob.rename(columns={'Total Accounted': 'Acreage', 'State': 'STATE', 'County': 'COUNTY', 'CITYPOP_max': 'City_Pop',
                           'SEX_2_mean': 'Female_Rate', 'EMPSTAT_1_mean': 'Employment_Rate', 'EMPSTAT_2_mean': 'Unemployment_Rate',
                           'EMPSTAT_3_mean': 'NotLaborForce_Rate'})

#rename industry code columns
rename_mapping = {
    col: col[8:-5] if col.startswith('IND_Cat_IND') else col for col in Koob.columns
}

Koob = Koob.rename(columns=rename_mapping)

In [5]:
# fix sexes
Koob['Male_Rate'] = 1 - Koob['Female_Rate']

# creating land cover rates
Koob['Cropland_Rate'] = (Koob['Major Crops'] + Koob['Minor Crops']) / Koob['Acreage']
Koob['Pasture_Rate'] = Koob['Pasture'] / Koob['Acreage']
Koob['Forest_Rate'] = Koob['Forest'] / Koob['Acreage']


# creating land density index
Koob['Density_Score'] = (0.1*Koob['Developed Open'] + 0.35*Koob['Developed Low'] + 0.65*Koob['Developed Med'] + 0.9*Koob['Developed High']) / Koob['Acreage']
min_density_score = Koob['Density_Score'].min()
range_density_score = Koob['Density_Score'].max() - min_density_score
Koob['Density_Index'] = (Koob['Density_Score'] - min_density_score) / range_density_score

# creating human density score
Koob['Population_Density'] = np.where(
    (Koob['Acreage'] > 0) & (Koob['Acreage'].notnull()),
    np.maximum(Koob['City_Pop'], Koob['population']) / Koob['Acreage'], pd.NA)

In [6]:
Koob = Koob.drop(columns={'Major Crops', 'Minor Crops', 'Pasture', 'Hay', 'Forest', 'FIPS_', 'EDUC_HS_mean', 'EDUC_College_mean',
                  'IND_Cat_Other_mean', 'STATEFIPS_agg', 'year', 'state', 'state_po', 'county_name', 'county_fips', 'STATEFIPS_politics',
                   'CBSA Code', 'cbsacode', 'metropolitandivisioncode', 'csacode', 'cbsatitle', 'metropolitanmicropolitanstatis',
                   'metropolitandivisiontitle', 'csatitle', 'countycountyequivalent', 'statename', 'centraloutlyingcounty',
                   'STATEFIPS_religion', 'name', 'blkgrps', 'STATEFIPS_transit', 'State_marijuana', 'Unnamed: 0'})

In [15]:
Koob.columns
Koob.shape

(105153, 64)

In [8]:
# only relevant NAICS
# columns_to_drop = [col for col in Jovi.columns if col.startswith('naics_') and not (col.startswith('naics_71') or col.startswith('naics_72'))]
# Jovi = Jovi.drop(columns=columns_to_drop)


# count 71s and 72s
Jovi['Arts_Ent_Rec_Est'] = Jovi[[col for col in Jovi.columns if col.startswith('naics_71')]].sum(axis=1)
Jovi['Food_Est'] = Jovi[[col for col in Jovi.columns if col.startswith('naics_72')]].sum(axis=1)

# no NAICS
columns_to_drop_all = [col for col in Jovi.columns if col.startswith('naics_')]
Jovi = Jovi.drop(columns=columns_to_drop_all)
Jovi = Jovi.drop(columns={'STATE', 'COUNTY'})
# Jovi.columns
# Jovi.shape

In [9]:
# Arwen.columns

In [10]:
Arwen = Arwen.drop(columns={'Unnamed: 0.1', 'Unnamed: 0', 'STATE', 'COUNTY'})
# Arwen.shape

In [13]:
# merge !!!!

Koob['FIPS'] = Koob['FIPS'].astype(str)
Jovi['FIPS'] = Jovi['FIPS'].astype(str)
Arwen['FIPS'] = Arwen['FIPS'].astype(str)

Master = pd.merge(Koob, Jovi, on='FIPS', how='outer')
Master = pd.merge(Master, Arwen, on='FIPS', how='outer')

In [14]:
Master.shape

(105815, 129)

In [16]:
Master.to_csv('City_Master.csv')