# Where are best locations for opening a new independent yarn shop?

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import folium
import geopandas as gpd

%matplotlib inline

In [None]:
# read in shops_by_county dataframe
# sames solution as below with population csv to keep leading 0 in GEOID, state, and county and columns
shops_df = pd.read_csv('../data/df_shops_by_county.csv', dtype = {'STATEFP' : str,
                                                                  'COUNTYFP' : str,
                                                                  'GEOID' : str})
shops_df.head(2)


In [None]:
shop_county_count_df = shops_df[['state', 'STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME']].copy()
shop_county_count_df.head(2)

In [None]:
shop_count = shop_county_count_df.groupby('GEOID').count()
shop_count

In [None]:
shop_county_count_df.drop_duplicates()

In [None]:
shop_count = shop_count.drop(['state', 'STATEFP', 'COUNTYFP', 'COUNTYNS'], 1)
shop_count

In [None]:
shop_count = shop_count.reset_index()

In [None]:
shop_count = shop_count.rename(columns = {'NAME' : 'count'})
shop_count

In [None]:
shop_count.loc[shop_count['GEOID'] == '01089']

In [None]:
shop_county_count_df = shop_county_count_df.merge(shop_count, left_on = 'GEOID', right_on = 'GEOID').drop_duplicates()
shop_county_count_df

In [None]:
# read in county population dataframe
# original file was in a different encoding, changed because I hoped using the standard encoding would 
# prevent leading 0 from being dropped in county column
# I need to concatenate those columns down the line to merge with shapefile (matches GEOID)
# easier to change read in to keep leading 0

pd.set_option('display.max_columns', 60)
population_df = pd.read_csv('../data/us_popest_2019_utf8.csv', dtype = {'STATE' : str,
                                                                        'COUNTY' : str})
population_df.head(2)


In [None]:
population_df = population_df[['SUMLEV', 'REGION', 'DIVISION', 'STATE', 'COUNTY', 'STNAME',
                               'CTYNAME', 'POPESTIMATE2019']].copy()
population_df.head(2)

In [None]:
population_df.loc[population_df['COUNTY'] == '000']

In [None]:
# filter out state population rows (0 on the COUNTY column)

state_pop = population_df.loc[population_df['COUNTY'] == '000'].index
pop_county_df = population_df.drop(state_pop)
pop_county_df.head()

In [None]:
pop_county_df.info()

In [None]:
pop_county_df.loc[pop_county_df['STNAME'] == 'Alabama']

In [None]:
popcounty_merge_df = pd.merge(pop_county_df, shop_county_count_df,
                              how = 'left', 
                              left_on = ['STATE', 'COUNTY', 'STNAME'],
                              right_on = ['STATEFP', 'COUNTYFP', 'state'])
popcounty_merge_df.head()

In [None]:
popcounty_merge_df.info()

In [None]:
# population per current shop
popcounty_merge_df['shop_pop'] = popcounty_merge_df['POPESTIMATE2019']/popcounty_merge_df['count']

In [None]:
# determine median, mean of population per current shop
popcounty_merge_df['shop_pop'].describe()


shop population
- count    935
- mean     114110
- std      168890
- min      591
- 25%      25191
- 50%      54366
- 75%      134407
- max      2,253,858

Median seems a reasonable measure of population likely to support a shop, although it is definitely not the whole picture. For my analysis this is a good place to start.

In [None]:
# replace null in shop_pop with current population
popcounty_merge_df['shop_pop'] = popcounty_merge_df['shop_pop'].fillna(popcounty_merge_df.POPESTIMATE2019)

In [None]:
# population available to support another shop, assuming median population required
popcounty_merge_df['avail_pop'] = popcounty_merge_df['POPESTIMATE2019']-(popcounty_merge_df['count']*54366)

In [None]:
# replace null in avail_pop with current population
popcounty_merge_df['avail_pop'] = popcounty_merge_df['avail_pop'].fillna(popcounty_merge_df.POPESTIMATE2019)

In [None]:
# number of shops available population could support assuming the median population required
popcounty_merge_df['shop_potential'] = popcounty_merge_df['avail_pop']/54366

In [None]:
# concat state and county columns to create geoid for every county, then drop original geoid column

popcounty_merge_df['geoid'] = popcounty_merge_df['STATE'] + popcounty_merge_df['COUNTY']
popcounty_merge_df = popcounty_merge_df.drop(['state', 'STATEFP', 'COUNTYFP', 'COUNTYNS',
                                              'GEOID', 'NAME'], axis = 1)

In [None]:
popcounty_merge_df.head()

In [None]:
# ready for shape file and mapping
# chloropleth is appropriate here

In [None]:
# read in geojson county shapefile

counties = gpd.read_file('../data/tl_2017_us_county.json')
print(counties.crs)
counties.head(2)

In [None]:
#take FIPS for territories and drop based on STATEFP column
territories_list = ['60', '66', '69', '72', '78']

counties = counties[~counties['STATEFP'].isin(territories_list)]

fig, ax = plt.subplots(figsize = (20, 20))

counties.plot(column = 'COUNTYNS', ax = ax)

# to tighten the map
plt.xlim(-175, -65);


In [None]:
# drop extra columns
counties = counties[['STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME', 'geometry']]
counties.head()

In [None]:
potential_df = pd.merge(popcounty_merge_df, counties,
                        left_on = 'geoid', 
                        right_on = 'GEOID')
potential_df = potential_df.drop(['SUMLEV', 'STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME'], 1)
potential_df.head()

In [None]:
potential_df.to_csv('../data/df_potential.csv', index = False)

In [None]:
potential_tableau = potential_df[['STNAME', 'CTYNAME', 'POPESTIMATE2019', 'count', 'shop_potential']].copy()


In [None]:
potential_tableau = potential_tableau.rename(columns = {'STNAME' : 'state',
                                                        'CTYNAME' : 'county',
                                                        'POPESTIMATE2019' : 'pop_2019',
                                                        'count' : 'current_shops'})
potential_tableau.head()

In [None]:
potential_tableau.to_csv('../data/df_potential_tableau.csv', index = False)

In [None]:
potential_geo = pd.merge(counties, popcounty_merge_df,
                        left_on = 'GEOID',
                        right_on = 'geoid')
#potential_geo = potential_geo.drop(['SUMLEV', 'STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME'], 1)
potential_geo.head()

In [None]:
type(potential_geo)

#have tried quite a few different things, but nothing quite works
#want a choropleth map reflecting values from shop_potential column and the ability to see the values
#by hovering over the area
#thinking python might not be right, going to put the data into Tableau and see what I can do there

#construct map object
m = folium.Map([39.50, -98.35], zoom_start = 4)

#create choropleth
folium.Choropleth(
    geo_data = potential_geo,
    name = 'County', #changed
    data = potential_geo,
    columns = ['CTYNAME', 'shop_potential'],
    key_on = 'feature.id',
    fill_color = 'YlGn',
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = 'Potential for New Yarn Shop by County'
).add_to(m)

#add layer control
folium.LayerControl().add_to(m)

#save and display
m.save('../Data/potential_map.html')

m

In [None]:
potential_geo.to_csv('../data/df_potential_geo.csv', index = False)