In [None]:
# read cencus data and print the population of each county

import csv
from collections import defaultdict

def read_census_data(filename):
    data = defaultdict(int)
    with open(filename, 'r'):
        reader = csv.reader(f)
        for row in reader:
            data[row[5]] += int(row[6])
    return data

def main():
    census_data = read_census_data('census.csv')
    for county, population in census_data.items():
        print(f'{county}: {population}')

if __name__ == '__main__':
    main()

In [27]:
# Step 1: Ensure pandas is installed (usually done outside the script, e.g., using pip)

# pip install pandas
# Step 2: Import pandas
import pandas as pd

# Step 3: Use read_stata to load a .dta file
data = pd.read_stata('2024/census_surnames_lower.dta')

data.rename(columns={'Name': 'name'}, inplace=True)
data.rename(columns={'pctwhite': 'White'}, inplace=True)
data.rename(columns={'pctblack': 'Black'}, inplace=True)
data.rename(columns={'pctapi': 'Api'}, inplace=True)
data.rename(columns={'pctaian': 'Native'}, inplace=True)
data.rename(columns={'pct2prace': 'Multiple'}, inplace=True)
data.rename(columns={'pcthispanic': 'Hispanic'}, inplace=True)

data.drop(columns=['rank', 'count', 'prop100k', 'cum_prop100k', 'countmiss', 'remaining'], inplace=True)
data['name'] = data['name'].str.upper()
data = data.sort_values(by='name')


# Step 4: Inspect the data (optional)
print(data.head())  # Prints the first few rows of the dataframe

data.to_csv('2024/census_surnames_lower.csv', index=False)

           Name   White    Black       Api    Native  Multiple  Hispanic
127185       AA  0.4919  0.20970  0.177400  0.008100  0.008100  0.104800
134036      AAB  0.9569  0.00000  0.010775  0.010775  0.010775  0.010775
40464    AABERG  0.9489  0.00000  0.004950  0.021600  0.019600  0.004950
82018      AABY  0.9953  0.00000  0.000000  0.000000  0.002350  0.002350
55375   AADLAND  0.9164  0.00435  0.014400  0.031700  0.028800  0.004350


In [24]:
import pandas as pd

# Step 3: Use read_stata to load a .dta file
data = pd.read_stata('2024/zip_over18_race_jan20.dta')
# Step 4: Inspect the data (optional)


data['Whte'] = data['NH_White_alone'] / data['Total_Pop']
data['Black'] = data['NH_Black_alone'] / data['Total_Pop']
data['Api'] = data['NH_API_alone'] / data['Total_Pop']
data['Native'] = data['NH_AIAN_alone'] / data['Total_Pop']
data['Hispanic'] = data['Hispanic_Total'] / data['Total_Pop']

data['Multiple_Sum'] = data['NH_Other_alone'] + data['NH_Mult_Total'] + data['NH_White_Other'] + data['NH_Black_Other'] + data['NH_Asian_HPI'] + data['NH_AIAN_Other'] + data['NH_Asian_HPI_Other'] + data['NH_Asian_HPI_Other']
data['Multiple'] = data['Multiple_Sum'] / data['Total_Pop']

zctac = pd.DataFrame()
zctac['Zcta5'] = data['ZCTA5']
zctac['White'] = data['Whte']
zctac['Black'] = data['Black']
zctac['Api'] = data['Api']
zctac['Native'] = data['Native']
zctac['Multiple'] = data['Multiple']
zctac['Hispanic'] = data['Hispanic']


zctac.to_csv('2024/zip_over18_race_jan20.csv', index=False)
print(data.head())  # Prints the first few rows of the dataframe

          name      geo_id_raw  ZCTA5 State_FIPS20 County_FIPS20  Total_Pop  \
0  ZCTA5 00601  860Z200US00601  00601                                 14157   
1  ZCTA5 00602  860Z200US00602  00602                                 31500   
2  ZCTA5 00603  860Z200US00603  00603                                 41255   
3  ZCTA5 00606  860Z200US00606  00606                                  4135   
4  ZCTA5 00610  860Z200US00610  00610                                 21563   

   Hispanic_Total  Non_Hispanic_Total  NH_White_alone  NH_Black_alone  ...  \
0           14104                  53              37               1  ...   
1           31258                 242             190              10  ...   
2           40571                 684             490              68  ...   
3            4103                  32              26               1  ...   
4           21373                 190             141              14  ...   

   NH_Asian_HPI  NH_Asian_HPI_Other  NH_API_Other      W