In [1]:
import extract
import pandas as pd
import os
import geopandas as gpd

In [2]:
# open state_fips.csv
state_fips = pd.read_csv('state_fips.csv')
state_fips.head()

Unnamed: 0,state,code,abbreviation
0,Alabama,1,AL
1,Alaska,2,AK
2,Arizona,4,AZ
3,Arkansas,5,AR
4,California,6,CA


In [3]:
# for every folder in raw_tract_data
for folder in os.listdir('raw_tract_data'):
    # split folder into list by underscore
    folder_tokens = folder.split('_')
    
    # get state column from state_fips where code is third item in list as int
    state = state_fips[state_fips['code'] == int(folder_tokens[2])]['state'].values[0].lower()

    # if last item in folder is 'tract' year is 2000, otherwise it is 20 + last two digits
    year = '2020' if folder_tokens[-1] == 'tract' else '20' + folder_tokens[-1][-2:]

    # if tract file already exists, skip
    if os.path.exists(f'tract_polygons/{state}_tracts_{year}.csv'):
        continue

    # get the filepath of the shp file in the folder with the same name
    shp_file = os.path.join('raw_tract_data', folder, folder + '.shp')

    tract_geos = extract.read_tracts(shp_file)
    
    # save as acs\tract_polygons\berkeley_tracts_2010.csv
    tract_geos.to_csv(f'tract_polygons/{state}_tracts_{year}.csv', index=False)

                                              geometry TRACTCE
0    POLYGON ((-71.99900 44.59293, -71.99869 44.593...  957100
1    POLYGON ((-72.08352 44.51031, -72.08252 44.510...  957400
2    POLYGON ((-72.32168 44.46276, -72.32014 44.464...  957000
3    POLYGON ((-72.43014 44.51166, -72.42985 44.512...  957700
4    POLYGON ((-72.08324 44.51127, -72.08178 44.516...  957300
..                                                 ...     ...
188  POLYGON ((-72.47736 43.13672, -72.47728 43.137...  967000
189  POLYGON ((-72.60848 42.97069, -72.60845 42.971...  967700
190  POLYGON ((-72.58944 42.82603, -72.58942 42.826...  968500
191  POLYGON ((-72.92573 42.83640, -72.92547 42.841...  968000
192  POLYGON ((-72.93016 42.74466, -72.93015 42.744...  968100

[193 rows x 2 columns]


In [46]:
# for alaska upper (weird KML)
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
districts = gpd.read_file('raw_geo_data\\alaska_upper_2022.kml', driver='KML')
# rename Name to district
districts = districts.rename(columns={'Name': 'district'})
# only keep district and geometry
districts = districts[['district', 'geometry']]
# only keep the part of the district field in the form 0200A
districts['district'] = districts['district'].str.extract(r'(\d{4}[A-Z])')
# only keep the last letter
districts['district'] = districts['district'].str[-1]
# save to geo_polygons folder as alaska_upper_districts_2022.csv
# districts.to_csv('geo_polygons/alaska_upper_districts_2022.csv', index=False)

In [4]:
# for SHP
# read in shp
districts = gpd.read_file(r'raw_geo_data/Pierce_Historic_Council_Districts_2002_1824713151309020194/Historic_Council_Districts_2002.shp')
districts

Unnamed: 0,DISTNO,MAPLABEL,COUNCIL_ME,COUNCIL_NU,geometry
0,0,,,0,"MULTIPOLYGON (((1068425.250 743086.250, 106866..."
1,1,District #1,Jan Shabro,1,"MULTIPOLYGON (((1195739.750 686405.368, 119571..."
2,2,District #2,Calvin Goings,2,"MULTIPOLYGON (((1179816.433 714411.123, 117927..."
3,3,District #3,Kevin Wimsett,3,"MULTIPOLYGON (((1202350.125 598557.828, 120234..."
4,4,District #4,Harold G. Moss,4,"POLYGON ((1145613.865 699661.390, 1145614.875 ..."
5,5,District #5,Wendell Brown,5,"MULTIPOLYGON (((1141981.106 685690.661, 114198..."
6,6,District #6,Pat O' Malley,6,"MULTIPOLYGON (((1158386.322 644176.489, 115836..."
7,7,District #7,Karen Biskey,7,"MULTIPOLYGON (((1111498.890 762102.762, 111127..."


In [5]:
# only keep district and geometry
district_name = 'DISTNO'
year=2002
desc='pierce_county_council'
districts = districts[[district_name, 'geometry']]
# rename to district and convert to int
districts = districts.rename(columns={district_name: 'district'})
districts['district'] = districts['district'].astype(int)

In [7]:
# save to geo_polygons folder
districts.to_csv(f'geo_polygons/{desc}_{year}.csv', index=False)