In [1]:
%matplotlib inline
from config import API_KEY
import requests
import sample
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses'

file_name_city = 'city_list.csv'
final_file = 'gym_concept_ratio.csv'
CITY_STATE = pd.read_csv(file_name_city)
CITY_STATE['location'] = CITY_STATE.city + ',' + CITY_STATE.state

# CITY = ['Atlanta', 'Chicago', 'Dallas', 'Denver', 'Detroit', 'Los Angeles','Miami', 'Philadelphia', 'New York']
# STATE = ['Georgia', 'Illinois', 'Texas', 'Colorado', 'Michigan', 'California','Florida', 'Pennsylvania', 'New York']

In [3]:
locations = []

for location in CITY_STATE['location']:
    locations.append(location)
locations

['New York,NY',
 'Los Angeles,CA',
 'Chicago,IL',
 'Houston,TX',
 'Phoenix,AZ',
 'Philadelphia,PA',
 'San Antonio,TX',
 'San Diego,CA',
 'Dallas,TX',
 'San Jose,CA',
 'Austin,TX',
 'Jacksonville,FL',
 'San Francisco,CA',
 'Columbus,OH',
 'Indianapolis,IN',
 'Fort Worth,TX',
 'Charlotte,NC',
 'Seattle,WA',
 'Denver,CO',
 'El Paso,TX',
 'Washington,DC',
 'Boston,MA',
 'Detroit,MI',
 'Nashville,TN',
 'Memphis,TN',
 'Portland,OR',
 'Oklahoma City,OK',
 'Las Vegas,NV',
 'Louisville,KY',
 'Baltimore,MD',
 'Milwaukee,WI',
 'Albuquerque,NM',
 'Tucson,AZ',
 'Fresno,CA',
 'Sacramento,CA',
 'Mesa,AZ',
 'Kansas City,MO',
 'Atlanta,GA',
 'Long Beach,CA',
 'Colorado Springs,CO',
 'Raleigh,NC',
 'Miami,FL',
 'Virginia Beach,VA',
 'Omaha,NE',
 'Oakland,CA',
 'Minneapolis,MN',
 'Tulsa,OK',
 'Arlington,TX',
 'New Orleans,LA']

In [4]:
#For test: locations = ["Austin,TX", "Round Rock,TX"]
term = "gyms"
limit = 50
open_at_time = 1528961400 # Gyms open on Thursday 1:30 a.m. CST

In [5]:
# get total gyms

def get_total_gym(term, locations):
    
    total_gym = []
    
    for location in locations:
        params={
            "term" : term,
            "location" : location,
            "limit" : 50,
        }
        
        response = sample.request(API_HOST, SEARCH_PATH, API_KEY, params)
        total = response['total']
        print(f"get_total_gym: {total} gyms in {location}")
        
        total_gym.append(total)
        
    final_df = pd.DataFrame({
         "location" : locations,
        "total_gyms" : total_gym  
    })
        
    return final_df

In [6]:
test_df = get_total_gym(term,locations)

Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 5100 gyms in New York,NY
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 6600 gyms in Los Angeles,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 2300 gyms in Chicago,IL
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1500 gyms in Houston,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1700 gyms in Phoenix,AZ
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1400 gyms in Philadelphia,PA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 533 gyms in San Antonio,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 2400 gyms in San Diego,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1600 gyms in Dallas,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1900 gyms in San Jose,CA
Querying https://api.yelp.com/v3/businesses/s

In [7]:
def get_total_24hr_gym(term, locations, open_at_time):
    
    total_gym = []
    
    for location in locations:
        params={
            "term" : term,
            "location" : location,
            "open_at" : open_at_time,
            "limit" : 50,
        }
        
        response = sample.request(API_HOST, SEARCH_PATH, API_KEY, params)
        total = response['total']
        print(f"get_total_24hr_gym: {total} gyms in {location}")
        
        total_gym.append(total)
        
    final_df = pd.DataFrame({
         "location" : locations,
        "total_24hr_gyms" : total_gym  
    })
        
    return final_df

In [8]:
def main(term, locations, open_at_time, file_name):
    total_24hr_gym = get_total_24hr_gym(term, locations, open_at_time)
    total_gym = get_total_gym(term, locations)
    combined_gym = pd.merge(total_gym, total_24hr_gym, on='location', left_index=True)
    combined_gym['ratio%'] = (combined_gym.total_24hr_gyms/combined_gym.total_gyms*100).round(2)
    combined_gym.to_csv(file_name, index=False)
    return combined_gym

In [9]:
final = main(term, locations, open_at_time, 'gym_ratio.csv')

Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 152 gyms in New York,NY
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 185 gyms in Los Angeles,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 114 gyms in Chicago,IL
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 104 gyms in Houston,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 90 gyms in Phoenix,AZ
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 86 gyms in Philadelphia,PA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 55 gyms in San Antonio,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 75 gyms in San Diego,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 128 gyms in Dallas,TX
Querying https://api.yelp.com/v3/businesses/search ...
get_total_24hr_gym: 49 gyms in San Jose,CA
Querying 

get_total_gym: 1500 gyms in Mesa,AZ
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 467 gyms in Kansas City,MO
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1500 gyms in Atlanta,GA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1500 gyms in Long Beach,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 203 gyms in Colorado Springs,CO
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 574 gyms in Raleigh,NC
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 1300 gyms in Miami,FL
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 365 gyms in Virginia Beach,VA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 205 gyms in Omaha,NE
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 2500 gyms in Oakland,CA
Querying https://api.yelp.com/v3/businesses/search ...
get_total_gym: 819 gyms in Minneapolis,MN
Queryin

In [10]:
final["city"] = final["location"].apply(lambda x: x.split(",")[0])
final["state"] = final["location"].apply(lambda x: x.split(",")[1])
final

Unnamed: 0,location,total_gyms,total_24hr_gyms,ratio%,city,state
0,"New York,NY",5100,152,2.98,New York,NY
1,"Los Angeles,CA",6600,185,2.8,Los Angeles,CA
2,"Chicago,IL",2300,114,4.96,Chicago,IL
3,"Houston,TX",1500,104,6.93,Houston,TX
4,"Phoenix,AZ",1700,90,5.29,Phoenix,AZ
5,"Philadelphia,PA",1400,86,6.14,Philadelphia,PA
6,"San Antonio,TX",533,55,10.32,San Antonio,TX
7,"San Diego,CA",2400,75,3.12,San Diego,CA
8,"Dallas,TX",1600,128,8.0,Dallas,TX
9,"San Jose,CA",1900,49,2.58,San Jose,CA


In [11]:
populations = pd.read_csv("city_populations.csv")

In [12]:
populations

Unnamed: 0,city,state,population
0,New York,NY,8537673
1,Los Angeles,CA,3976322
2,Chicago,IL,2704958
3,Houston,TX,2303482
4,Phoenix,AZ,1615017
5,Philadelphia,PA,1567872
6,San Antonio,TX,1492510
7,San Diego,CA,1406630
8,Dallas,TX,1317929
9,San Jose,CA,1025350


In [13]:
final = pd.merge(final, populations, on= ["city", "state"], how="inner")

In [15]:
final.head()
# final["population"] = final["population"].apply(lambda x: x.replace(",",""))
final["population"] = final["population"].astype(float)

In [16]:
final.dtypes

location            object
total_gyms           int64
total_24hr_gyms      int64
ratio%             float64
city                object
state               object
population         float64
dtype: object

In [63]:
final["total_gyms_per_thousand_people"] = final["total_gyms"]/final["population"] * 1000

final["24 hour gyms per thousand people"] = final["total_24hr_gyms"]/final["population"] * 1000

final_df = final.rename(columns={"city": "City"})

In [64]:
final_df

Unnamed: 0,location,total_gyms,total_24hr_gyms,ratio%,City,state,population,total_gyms_per_thousand_people,24 hour gyms per thousand people
0,"New York,NY",5100,152,2.98,New York,NY,8537673.0,0.597352,0.017803
1,"Los Angeles,CA",6600,185,2.8,Los Angeles,CA,3976322.0,1.659825,0.046525
2,"Chicago,IL",2300,114,4.96,Chicago,IL,2704958.0,0.85029,0.042145
3,"Houston,TX",1500,104,6.93,Houston,TX,2303482.0,0.651188,0.045149
4,"Phoenix,AZ",1700,90,5.29,Phoenix,AZ,1615017.0,1.05262,0.055727
5,"Philadelphia,PA",1400,86,6.14,Philadelphia,PA,1567872.0,0.89293,0.054851
6,"San Antonio,TX",533,55,10.32,San Antonio,TX,1492510.0,0.357117,0.036851
7,"San Diego,CA",2400,75,3.12,San Diego,CA,1406630.0,1.706206,0.053319
8,"Dallas,TX",1600,128,8.0,Dallas,TX,1317929.0,1.214026,0.097122
9,"San Jose,CA",1900,49,2.58,San Jose,CA,1025350.0,1.853026,0.047789


In [66]:
output_final = final_df[['City','state','ratio%']]

In [67]:
final.to_csv("gym_totals.csv", index=False)

In [68]:
output_final.to_csv('city_gym_ratio.csv', index=False)

In [69]:
obesity = pd.read_csv("FiftyCity_ObesityRates.csv")

In [71]:
obesity.head()

Unnamed: 0,State,City,Obesity
0,NM,Albuquerque,21.70%
1,TX,Arlington,35.30%
2,GA,Atlanta,28.70%
3,TX,Austin,27.00%
4,MD,Baltimore,28.00%


In [74]:
obesity_gyms = pd.merge(final_df, obesity, on="City", how="left")

In [89]:
obesity_gyms

Unnamed: 0,location,total_gyms,total_24hr_gyms,ratio%,City,state,population,total_gyms_per_thousand_people,24 hour gyms per thousand people,State,Obesity
0,"New York,NY",5100,152,2.98,New York,NY,8537673.0,0.597352,0.017803,NY,21.90%
1,"Los Angeles,CA",6600,185,2.8,Los Angeles,CA,3976322.0,1.659825,0.046525,CA,24.30%
2,"Chicago,IL",2300,114,4.96,Chicago,IL,2704958.0,0.85029,0.042145,IL,27.00%
3,"Houston,TX",1500,104,6.93,Houston,TX,2303482.0,0.651188,0.045149,TX,29.10%
4,"Phoenix,AZ",1700,90,5.29,Phoenix,AZ,1615017.0,1.05262,0.055727,AZ,22.80%
5,"Philadelphia,PA",1400,86,6.14,Philadelphia,PA,1567872.0,0.89293,0.054851,PA,24.40%
6,"San Antonio,TX",533,55,10.32,San Antonio,TX,1492510.0,0.357117,0.036851,TX,29.80%
7,"San Diego,CA",2400,75,3.12,San Diego,CA,1406630.0,1.706206,0.053319,CA,26.10%
8,"Dallas,TX",1600,128,8.0,Dallas,TX,1317929.0,1.214026,0.097122,TX,33.80%
9,"San Jose,CA",1900,49,2.58,San Jose,CA,1025350.0,1.853026,0.047789,CA,21.20%


In [90]:
final.to_csv(final_file, index=False)

In [94]:
obesity_gyms["Obesity percent"] = obesity_gyms["Obesity"].map(lambda x: x.rstrip("%"))
obesity_gyms["Obesity percent"] = obesity_gyms["Obesity percent"].astype(float)
obesity_gyms

KeyError: 'Obesity'

In [86]:
obesity_gyms.plot(kind="scatter", x="Obesity", y="24 hour gyms per thousand people")
plt.tight_layout()

KeyError: 'Obesity'

# 

In [None]:
x_axis = obesity_gyms["city"]
y_axis = obesity_gyms["total_gyms"]

plt.bar(x_axis, y_axis, color="b", align="center")
plt.xticks(rotation=90)
plt.tight_layout()

# Bubble Map

In [None]:
import plotly.plotly as py
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
df.head()

df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str)+' million'
limits = [(0,2),(3,10),(11,20),(21,50),(50,3000)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","rgb(255,220,0)"]
cities = []
scale = 50000

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df[lim[0]:lim[1]]
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df_sub['lon'],
        lat = df_sub['lat'],
        text = df_sub['text'],
        sizemode = 'diameter',
        marker = dict( 
            size = df_sub['pop']/scale, 
            color = colors[i],
            line = dict(width = 2,color = 'black')
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    cities.append(city)

layout = dict(
        title = '2014 US city populations<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',       
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"           
        ),  
    )
    
fig = dict( data=cities, layout=layout )
url = py.plot( fig, validate=False, filename='d3-bubble-map-populations' )