# This notebook is used to create the xy_zipcode_2013to2019.csv

In [None]:
import pycrs
import pyproj
from functools import lru_cache

#set formats
from_crs = pycrs.parse.from_epsg_code(102718).to_proj4() # original xy format
to_crs=pycrs.parse.from_epsg_code(4326).to_proj4() #normal lat/long format

#create projects from formats
fromproj = pyproj.Proj(from_crs)
toproj = pyproj.Proj(to_crs)

@lru_cache()
def xy_to_latlong(x,y):
    # transform the formats
    long,lat=pyproj.transform(fromproj, toproj, x,y) #note output is long, lat, instead of lat, long
    return (lat,long) #switched back to lat,long

In [None]:
import fiona
import shapely.geometry

#read the geojson file and returns a list of shapes (zip codes)
def read_geojson(path):
    with fiona.open(path) as fiona_collection:
        shapes=[] # stores a collection of shapes (zip codes)
        for i in fiona_collection:
            shapefile_record = i 

            # Use Shapely to create the polygon
            shape = shapely.geometry.asShape(shapefile_record['geometry'] )
            
            shapes.append((shape,i))
    return shapes

#get the list of shapes
shapes=read_geojson('zipcodes.geojson')

@lru_cache()
def latlong_to_zipcode(coord):
    lat,long=coord #unpack
    
    #go through each shape
    for shape,i in shapes:
        #creat point from long/lat
        point = shapely.geometry.Point(long,lat) # note this is (longitude, latitude) not (latitude, longitude)
        
        if shape.contains(point):
            return (i['properties']['postalCode']) #zip code 

In [None]:
@lru_cache()
def xy_to_zipcode(x,y):
    return latlong_to_zipcode(xy_to_latlong(x,y))

In [None]:
# get the x and y coordinates and drop null or duplicates
df_xy=df.loc[:,['x','y']]
df_xy.dropna(inplace=True)
df_xy=df_xy.drop_duplicates()

#convert xy to into 
df_xy['x']=df_xy['x'].astype(int)
df_xy['y']=df_xy['y'].astype(int)

#get the zip codes of each xy coordinate and store in a dataframe
zipcode=df_xy.apply(lambda k: xy_to_zipcode(k.x, k.y), axis=1)
df_zipcode=pd.DataFrame(zipcode)

#merge the xy with zip code
df_zipcode=df_zipcode.merge(df_xy, left_index=True, right_index=True, how='inner')
df_zipcode.rename(columns={0:'zipcode'}, inplace=True)

#save the xy to zip code mapping in a csv file
df_zipcode.to_csv('data/xy_zipcode_2013to2019.csv')

df_zipcode