In [24]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
import json
import glob, os

# Google developer API key
from config import yelpkey

In [25]:
# Output File (CSV)

stl_zip_codes = "STL_zipcodesCORRECT.csv"

stl_zip_df = pd.read_csv(stl_zip_codes)
stl_zip_df.head()

Unnamed: 0,ZIP,Code,Type,County,Population,Area,Code(s),Unnamed: 7,Unnamed: 8
0,ZIP,Code,63101,Standard,Saint,Louis,City,2620,314
1,ZIP,Code,63102,Standard,Saint,Louis,City,2316,314
2,ZIP,Code,63103,Standard,Saint,Louis,City,6900,314
3,ZIP,Code,63104,Standard,Saint,Louis,City,18656,314
4,ZIP,Code,63106,Standard,Saint,Louis,City,11883,314


In [26]:
stl_zip_df.drop(['ZIP', 'Code', 'County', 'Population', 'Area', 'Code(s)', 'Unnamed: 8'], axis=1, inplace=True)
stl_zip_df.head()

Unnamed: 0,Type,Unnamed: 7
0,63101,2620
1,63102,2316
2,63103,6900
3,63104,18656
4,63106,11883


In [27]:
stl_zip_df = stl_zip_df.rename(columns={"Type": "ZIP Code", "Unnamed: 7": "Population"})
stl_zip_df.head()

Unnamed: 0,ZIP Code,Population
0,63101,2620
1,63102,2316
2,63103,6900
3,63104,18656
4,63106,11883


In [28]:
stl_zip_df.dtypes

ZIP Code      int64
Population    int64
dtype: object

In [29]:
zip_codes = stl_zip_df['ZIP Code']
#zip_codes

In [30]:
# importing crime data (CSV) and combining into 1 df

stl_crime_df = pd.concat(map(pd.read_csv, glob.glob(os.path.join("complete_file.csv"))))
stl_crime_df.head()

Unnamed: 0,Complaint,CodedMonth,DateOccur,FlagCrime,FlagUnfounded,FlagAdministrative,Count,FlagCleanup,Crime,District,...,LocationName,LocationComment,CADAddress,CADStreet,Unnamed: 18,state,street address,Zip,XCoord,YCoord
0,19-025935,2019-06,1/1/1900 0:00,Y,,,1,,51322,4,...,,,1811,WARREN,St. Louis,MO,"1811 WARREN, St. Louis, MO",63106,898905.7,1036540.0
1,19-020007,2019-05,6/18/1994 0:01,Y,,,1,,175500,1,...,,,7300,VERMONT,St. Louis,MO,"7300 VERMONT, St. Louis, MO",63101,892793.9,1076992.0
2,19-010094,2019-03,2/7/1996 13:02,Y,,,1,,263899,4,...,,@HEADQUARTERS,1915,OLIVE,St. Louis,MO,"1915 OLIVE, St. Louis, MO",63103,0.0,0.0
3,19-007109,2019-02,10/7/1996 12:43,Y,,,1,,263899,4,...,,@HEADQUARTERS,1915,OLIVE,St. Louis,MO,"1915 OLIVE, St. Louis, MO",63103,0.0,0.0
4,19-028394,2019-06,7/5/1998 12:00,Y,,,1,,115400,6,...,,,5704,ACME,St. Louis,MO,"5704 ACME, St. Louis, MO",63101,0.0,0.0


In [31]:
stl_crime_df.drop(['FlagCrime', 'FlagUnfounded', 'FlagAdministrative', 'Count', 'FlagCleanup'], axis=1, inplace=True)
#stl_crime_df.head()

In [32]:
zip_code_df = pd.DataFrame(stl_crime_df["Zip"].value_counts())
zip_code_df.head()

Unnamed: 0,Zip
63101,15623
63116,1156
63118,989
63147,859
63103,779


In [33]:
new_zip = zip_code_df.reset_index()
clean_zip = new_zip.rename(columns={"index": "ZIP Code", "Zip": "Crimes Committed"})
clean_zip.head()

Unnamed: 0,ZIP Code,Crimes Committed
0,63101,15623
1,63116,1156
2,63118,989
3,63147,859
4,63103,779


In [34]:
crime_merge = pd.merge(clean_zip, stl_zip_df, on="ZIP Code", how="outer")
crime_merge

Unnamed: 0,ZIP Code,Crimes Committed,Population
0,63101,15623.0,2620.0
1,63116,1156.0,43540.0
2,63118,989.0,26704.0
3,63147,859.0,11373.0
4,63103,779.0,6900.0
5,63111,748.0,20313.0
6,63104,666.0,18656.0
7,63107,650.0,11912.0
8,63115,638.0,20775.0
9,63112,574.0,20368.0


In [35]:
clean_zip_df = crime_merge.dropna(subset=['Crimes Committed', 'Population'])
clean_zip_df

Unnamed: 0,ZIP Code,Crimes Committed,Population
0,63101,15623.0,2620.0
1,63116,1156.0,43540.0
2,63118,989.0,26704.0
3,63147,859.0,11373.0
4,63103,779.0,6900.0
5,63111,748.0,20313.0
6,63104,666.0,18656.0
7,63107,650.0,11912.0
8,63115,638.0,20775.0
9,63112,574.0,20368.0


In [36]:
clean_zip_df.sum()

ZIP Code            1136041.0
Crimes Committed      26317.0
Population           309233.0
dtype: float64

In [37]:
lat_long = pd.read_csv("lat_long_zip.csv")
lat_long.drop(['City', 'State', 'County'], axis=1, inplace=True)
lat_long = lat_long.dropna(subset=['Latitude','Longitude','Zip'])
lat_long['Zip']=lat_long['Zip'].astype(int)
lat_long = lat_long.rename(columns={"Zip": "ZIP Code"})
lat_long

Unnamed: 0,Latitude,Longitude,ZIP Code
0,38.631501,-90.19231,63101
1,38.632858,-90.184094,63102
2,38.639526,-90.229622,63103
3,38.606767,-90.208088,63104
4,38.640583,-90.197764,63106
5,38.674248,-90.205776,63107
6,38.649981,-90.238889,63108
7,38.600683,-90.307787,63109
8,38.638236,-90.284279,63110
9,38.563116,-90.254522,63111


In [38]:
lat_long.dtypes
final_df = pd.merge(clean_zip_df,lat_long, on="ZIP Code", how="outer")
final_df

Unnamed: 0,ZIP Code,Crimes Committed,Population,Latitude,Longitude
0,63101,15623.0,2620.0,38.631501,-90.19231
1,63116,1156.0,43540.0,38.58866,-90.245639
2,63118,989.0,26704.0,38.602458,-90.217435
3,63147,859.0,11373.0,38.724134,-90.227944
4,63103,779.0,6900.0,38.639526,-90.229622
5,63111,748.0,20313.0,38.563116,-90.254522
6,63104,666.0,18656.0,38.606767,-90.208088
7,63107,650.0,11912.0,38.674248,-90.205776
8,63115,638.0,20775.0,38.695256,-90.243966
9,63112,574.0,20368.0,38.655697,-90.281436


In [63]:



# **************************

# Brooklyn code:
#r=requests.get("https://api.yelp.com/v3/businesses/search", headers={"content-type":"Bearer"})

url = "https://api.yelp.com/v3/businesses/search"

for page_num in [1, 2]:
    purl = "https://api.yelp.com/v3/businesses/search" + str(page_num)
    print(purl)

addresses_df = pd.read_csv("addresszip.csv")



store_names = []
loc_zip_codes =[]
stl_zip_codes =[]

zips = {
    63101,
    63102,
    63103,
    63104,
    63106,
    63107,
    63108,
    63109,
    63110,
    63111,
    63112,
    63113,
    63115,
    63116,
    63118,
    63120,
    63139,
    63147,
    
}


unique_zip_codes = addresses_df["Zip"].unique()


#for zip_code in ["63101","63102","63103","63104", "63106","63107", 
#                  "63108","63109", "63110","63111", "63112","63113",
#                  "63115","63116", "63118","63120", "63139","63147"]:

for index, row in final_df.iterrows():
#for zip_code in zip_codes:
#for zip_code in zips:
    zip_code = str(int(row['ZIP Code']))
    print(zip_code)
    params = {
        "Authorization": yelpkey,
        "latitude":    row['Latitude'],
        "longitude":    row['Longitude'],
        #"location": zip_code,
        "categories": "beer_and_wine,servicestations",
        "sort_by": "distance",
        "limit": "50",
        "radius": 5000
    }

    response = requests.get(url, params=params, headers={"Authorization": f"Bearer {yelpkey}"})
    
    data = response.json()
    print(f'input zip {zip_code} length {len(data["businesses"])}')
    
    for store in data["businesses"]:
        loc_zip = store["location"]["zip_code"]
        store_name = store["name"]
#         if loc_zip == zip_code:
        loc_zip_codes.append(loc_zip)
        store_names.append(store_name)
        stl_zip_codes.append(zip_code)
        
        
#         combined_stores.append(pd.Series([loc_data,name_data],index=["Zip Code","Store Name"]),ignore_index=True)
        
        
    
combined_stores_df = pd.DataFrame({"STL Zip Code":stl_zip_codes ,"Store Name": store_names, "Store Zip Code": loc_zip_codes})
combined_stores_df

#zip_code = "63115"

# response = requests.get(url, params=params, headers={"Authorization": f"Bearer {yelpkey}"})
# response
#unique_zip_codes

https://api.yelp.com/v3/businesses/search1
https://api.yelp.com/v3/businesses/search2
63101
input zip 63101 length 34
63116
input zip 63116 length 50
63118
input zip 63118 length 42
63147
input zip 63147 length 10
63103
input zip 63103 length 49
63111
input zip 63111 length 30
63104
input zip 63104 length 39
63107
input zip 63107 length 25
63115
input zip 63115 length 14
63112
input zip 63112 length 47
63113
input zip 63113 length 41
63110
input zip 63110 length 48
63106
input zip 63106 length 36
63108
input zip 63108 length 45
63102
input zip 63102 length 31
63120
input zip 63120 length 13
63109
input zip 63109 length 50
63139
input zip 63139 length 50


Unnamed: 0,STL Zip Code,Store Name,Store Zip Code
0,63101,Yoshi,63101
1,63101,Shell,63101
2,63101,James Henry Provisions,63102
3,63101,Crown Mart 7 Inc,63106
4,63101,Kilroy's,63102
5,63101,Overturf Bp Amoco,63102
6,63101,BP,63103
7,63101,Grand Petite Market,63103
8,63101,BP,63103
9,63101,Midwest Petroleum,63103


In [64]:
zip_count_df = combined_stores_df.groupby(['STL Zip Code']).size().reset_index(name='counts')
zip_count_df = zip_count_df.rename(columns={"STL Zip Code": "ZIP Code", "counts": "Liquor Stores"})
zip_count_df

Unnamed: 0,ZIP Code,Liquor Stores
0,63101,34
1,63102,31
2,63103,49
3,63104,39
4,63106,36
5,63107,25
6,63108,45
7,63109,50
8,63110,48
9,63111,30


In [65]:
data = response.json()
pprint(data)

{'businesses': [{'alias': 'u-gas-saint-louis',
                 'categories': [{'alias': 'servicestations',
                                 'title': 'Gas Stations'},
                                {'alias': 'carwash', 'title': 'Car Wash'}],
                 'coordinates': {'latitude': 38.60387, 'longitude': -90.292},
                 'display_phone': '(314) 644-4491',
                 'distance': 211.19286277516417,
                 'id': 'pKxOXAuC1i8mnUjxVW2tnQ',
                 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/HhRUM56qprJjN8vjtag8QA/o.jpg',
                 'is_closed': False,
                 'location': {'address1': '3225 Hampton Ave',
                              'address2': '',
                              'address3': '',
                              'city': 'Saint Louis',
                              'country': 'US',
                              'display_address': ['3225 Hampton Ave',
                                                  'Saint Louis, MO

                                {'alias': 'servicestations',
                                 'title': 'Gas Stations'}],
                 'coordinates': {'latitude': 38.5695115469798,
                                 'longitude': -90.2852766695122},
                 'display_phone': '(314) 353-8707',
                 'distance': 4070.552065764146,
                 'id': 'tUKNCjEwGsXxkwDuqQ6q9g',
                 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/uSB_fYA62B0YcheH6pUL7w/o.jpg',
                 'is_closed': False,
                 'location': {'address1': '6900 Gravois Ave',
                              'address2': None,
                              'address3': None,
                              'city': 'St Louis Downtown',
                              'country': 'US',
                              'display_address': ['6900 Gravois Ave',
                                                  'St Louis Downtown, MO '
                                                  '63

In [66]:
for store in data["businesses"]:
    
    print(store["location"]["zip_code"])

63139
63139
63139
63139
63139
63109
63143
63143
63139
63143
63139
63143
63110
63110
63109
63139
63143
63143
63139
63139
63139
63116
63143
63143
63143
63119
63110
63117
63105
63117
63119
63117
63116
63109
63143
63117
63143
63116
63117
63110
63117
63116
63144
63116
63108
63108
63108
63123
63108
63105


In [67]:
zip_count_df.dtypes

ZIP Code         object
Liquor Stores     int64
dtype: object

In [68]:
clean_zip_df.dtypes

ZIP Code              int64
Crimes Committed    float64
Population          float64
dtype: object

In [69]:
zip_count_df['ZIP Code'] = zip_count_df['ZIP Code'].astype(int)
zip_count_df.dtypes

ZIP Code         int64
Liquor Stores    int64
dtype: object

In [70]:
zip_final_df = pd.merge(zip_count_df, clean_zip_df, on="ZIP Code", how="outer")
zip_final_df

Unnamed: 0,ZIP Code,Liquor Stores,Crimes Committed,Population
0,63101,34,15623.0,2620.0
1,63102,31,468.0,2316.0
2,63103,49,779.0,6900.0
3,63104,39,666.0,18656.0
4,63106,36,521.0,11883.0
5,63107,25,650.0,11912.0
6,63108,45,489.0,21568.0
7,63109,50,357.0,26946.0
8,63110,48,524.0,17107.0
9,63111,30,748.0,20313.0


In [71]:
zip_final_df['Crimes per Person'] = zip_final_df['Crimes Committed'] / zip_final_df['Population']
zip_final_df['Crimes per Liquor Store'] = zip_final_df['Crimes Committed'] / zip_final_df['Liquor Stores']
zip_final_df

Unnamed: 0,ZIP Code,Liquor Stores,Crimes Committed,Population,Crimes per Person,Crimes per Liquor Store
0,63101,34,15623.0,2620.0,5.962977,459.5
1,63102,31,468.0,2316.0,0.202073,15.096774
2,63103,49,779.0,6900.0,0.112899,15.897959
3,63104,39,666.0,18656.0,0.035699,17.076923
4,63106,36,521.0,11883.0,0.043844,14.472222
5,63107,25,650.0,11912.0,0.054567,26.0
6,63108,45,489.0,21568.0,0.022672,10.866667
7,63109,50,357.0,26946.0,0.013249,7.14
8,63110,48,524.0,17107.0,0.030631,10.916667
9,63111,30,748.0,20313.0,0.036824,24.933333
