In [11]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')
    
from config import ALL_LOCATIONS
import generation
import map_utils
    
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, Polygon, LineString
from shapely.ops import unary_union
import matplotlib.pyplot as plt
import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




In [2]:
# Read a shapefile
geo_data_all_df = gpd.read_file("../data/500Cities_City_11082016/CityBoundaries.shp")
geo_data_all_df.head()

Unnamed: 0,NAME,CLASS,ST,STFIPS,PLACEFIPS,POP2010,STPLFIPS,geometry
0,Pharr,city,TX,48,57200,70400,4857200,"POLYGON ((-10929531.239 3029892.718, -10929486..."
1,McAllen,city,TX,48,45384,129877,4845384,"POLYGON ((-10934477.721 3037591.957, -10934450..."
2,Edinburg,city,TX,48,22660,77100,4822660,"POLYGON ((-10924696.742 3050068.459, -10924700..."
3,Laredo,city,TX,48,41464,236091,4841464,"POLYGON ((-11090500.378 3205304.554, -11090391..."
4,Mission,city,TX,48,48768,77058,4848768,"POLYGON ((-10946610.1 3030967.743, -10946347.6..."


In [4]:
locations_df = pd.DataFrame(ALL_LOCATIONS)
locations_df.head()

Unnamed: 0,id,lat,lon,name,state,region,cities
0,1,32.7157,-117.1611,San Diego,CA,California,"[San Diego, El Cajon, Chula Vista, Escondido, ..."
1,2,34.0522,-118.2437,Los Angeles,CA,California,"[Los Angeles, Burbank, Santa Monica, Glendale,..."
2,3,37.7749,-122.4194,Bay Area,CA,California,"[San Francisco, Oakland, Milpitas, Mountain Vi..."
3,4,28.5384,-81.3789,Orlando,FL,Florida,[Orlando]
4,5,25.7617,-80.1918,Miami,FL,Florida,"[Miami, Miami Beach, Hialeah, Miami Gardens, H..."


In [6]:
cities = []
    
# Iterate through each area in config
for location in ALL_LOCATIONS:
    # For each city in the area's cities list, create a pair with the state
    for city in location.cities:
        cities.append({
            'city': city,
            'state': location.state,
            'location': location.name,
            'location_id': location.id,
            'region': location.region,
        })

# Convert to DataFrame
cities_df = pd.DataFrame(cities)
cities_df.head()

Unnamed: 0,city,state,location,location_id,region
0,San Diego,CA,San Diego,1,California
1,El Cajon,CA,San Diego,1,California
2,Chula Vista,CA,San Diego,1,California
3,Escondido,CA,San Diego,1,California
4,Oceanside,CA,San Diego,1,California


In [7]:
cities_geo_df = geo_data_all_df.merge(
    cities_df, how='inner', left_on=['NAME','ST'], right_on = ['city','state']
)[['city', 'state', 'geometry', 'location', 'location_id', 'region', 'POP2010']]
cities_geo_df.rename(columns={"POP2010": "population"}, inplace=True)
cities_geo_df.head()

Unnamed: 0,city,state,geometry,location,location_id,region,population
0,San Antonio,TX,"MULTIPOLYGON (((-10961926.119 3407333.744, -10...",San Antonio,20,Texas Triangle,1327407
1,Austin,TX,"MULTIPOLYGON (((-10861676.106 3548126.409, -10...",Austin,17,Texas Triangle,790390
2,Houston,TX,"MULTIPOLYGON (((-10601763.841 3450638.449, -10...",Houston,19,Texas Triangle,2099451
3,Orlando,FL,"MULTIPOLYGON (((-9053370.515 3298107.956, -905...",Orlando,4,Florida,238300
4,Hollywood,FL,"MULTIPOLYGON (((-8924847.554 3005004.953, -892...",Miami,5,Florida,140768


In [8]:
CITIES_INFO = {}
for _, row in cities_geo_df.iterrows():
    CITIES_INFO[(row.city, row.state)] = {
        'geometry': row.geometry,
        'population': row.population
    }
    

In [9]:
local_area_df = cities_geo_df.groupby(['location', 'region', 'state'])['geometry'].apply(unary_union).to_frame().reset_index()
local_area_df.head()

Unnamed: 0,location,region,state,geometry
0,Austin,Texas Triangle,TX,"MULTIPOLYGON (((-10872647.984 3564027.856, -10..."
1,Baltimore,Northeast Corridor,MD,"MULTIPOLYGON (((-8523466.151 4752515.705, -852..."
2,Bay Area,California,CA,"MULTIPOLYGON (((-13627218.684 4552503.028, -13..."
3,Boston,Northeast Corridor,MA,"MULTIPOLYGON (((-7909405.847 5202635.152, -790..."
4,Chicago,Great Lakes,IL,"POLYGON ((-9758837.384 5164420.691, -9758834.9..."


In [13]:
map_utils.display_geometry_on_map(local_area_df[local_area_df['location'] == 'Seattle']['geometry'].iloc[0])

In [16]:
from shapely import geometry
from dataclasses import dataclass

In [17]:
@dataclass
class City:
	name: str
	state: str
	geometry: geometry

In [23]:
City("123", "123", local_area_df[local_area_df['location'] == 'Seattle']['geometry'].iloc[0])

City(name='123', state='123', geometry=<MULTIPOLYGON (((-13604080.658 6011242.403, -13604078.991 6011098.776, -1360...>)