In [1]:
!python --version

Python 3.6.3 :: Anaconda, Inc.


In [2]:
import os
import sys
import datetime
import requests
import urllib.request
import json
import zipfile
import pandas as pd
import tweepy
import psycopg2
from sqlalchemy import create_engine
from datetime import date, timedelta

sys.path.append('../')
from envir import config

### Obtain NYC Zip Code Data 

In [3]:
zip_url = 'https://data.ny.gov/api/views/juva-r6g2/rows.csv?accessType=DOWNLOAD'
urllib.request.urlretrieve(zip_url, config.shared+'ny_zip_demographics.csv')

('/gws_gpfs/projects/project-nyc_food_health/workspace/share/ny_zip_demographics.csv',
 <http.client.HTTPMessage at 0x7f1b964b8e10>)

In [4]:
zip_url = 'https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2019_Gazetteer/2019_Gaz_zcta_national.zip'
urllib.request.urlretrieve(zip_url, config.shared+'2019_Gaz_zcta_national.zip')
    
with zipfile.ZipFile(config.shared+'Gaz_zcta_national_2019.zip', 'r') as zip_ref:
    zip_ref.extractall(config.shared)

In [5]:
us_zips = pd.read_csv(config.shared+'2019_Gaz_zcta_national.txt', sep="\t")
us_zips.head()

Unnamed: 0,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,601,166659747,799292,64.348,0.309,18.180555,-66.749961
1,602,79307534,4428428,30.621,1.71,18.361945,-67.175597
2,603,81887188,181412,31.617,0.07,18.455183,-67.119887
3,606,109579993,12487,42.309,0.005,18.158327,-66.932928
4,610,93013428,4172059,35.913,1.611,18.294032,-67.127156


In [11]:
ny_zips = pd.read_csv(config.shared+'ny_zip_demographics.csv', sep=',')
nyc_zips = ny_zips[ny_zips['County Name'].isin(['New York', 'Kings', 'Queens','Richmond','Bronx'])]
nyc_zips = pd.merge(nyc_zips, us_zips, how='inner', left_on='ZIP Code', right_on = 'GEOID')
nyc_zips.head()

Unnamed: 0,County Name,State FIPS,County Code,County FIPS,ZIP Code,File Date,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,Bronx,36,5,36005,10472,07/25/2007,10472,2729341,0,1.054,0.0,40.829556,-73.86931
1,Bronx,36,5,36005,10463,07/25/2007,10463,3755855,345779,1.45,0.134,40.880678,-73.90654
2,New York,36,61,36061,10463,07/25/2007,10463,3755855,345779,1.45,0.134,40.880678,-73.90654
3,Bronx,36,5,36005,10468,07/25/2007,10468,2773214,45100,1.071,0.017,40.86894,-73.899995
4,Bronx,36,5,36005,10457,07/25/2007,10457,2743123,0,1.059,0.0,40.847162,-73.898663


In [12]:
nyc_zips.shape

(224, 13)

In [15]:
nyc_zips[nyc_zips.ALAND_SQMI==max(nyc_zips.ALAND_SQMI)]

Unnamed: 0,County Name,State FIPS,County Code,County FIPS,ZIP Code,File Date,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
113,Richmond,36,85,36085,10314,07/25/2007,10314,35432499,1687746,13.681,0.652,40.599263,-74.165748


In [16]:
nyc_zips[nyc_zips.ALAND_SQMI==min(nyc_zips.ALAND_SQMI)]

Unnamed: 0,County Name,State FIPS,County Code,County FIPS,ZIP Code,File Date,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
71,New York,36,61,36061,10177,07/25/2007,10177,5421,0,0.002,0.0,40.755139,-73.975934


### Set Connection to DB

In [19]:
engine = create_engine('postgresql+psycopg2:///yelp_abcs')
conn = engine.connect()

In [20]:
conn

<sqlalchemy.engine.base.Connection at 0x7f1b940ab6d0>

### Search for Tweets By ZipCode and Search Terms

In [21]:
auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret)
auth.set_access_token(config.access_token, config.access_token_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [22]:
delivery_apps=['doordash', 'ubereats', 'postmates', 'grubhub', 'seamless', 'caviar']
key_words=['delivery', 'order', 'takeout', 'dinner', 'late night eats']
# query_list = map(lambda x:x+" -filter:retweets", search_words) # remove retweets

fr_date = date.today() - timedelta(7) # twitter api only allows upto 7 days on the basic service

In [36]:



tweets = tweepy.Cursor(api.search, q='order doordash -filter:retweets', geocode = '40.7128,-74.0060,30km',
                       lang="en", since=fr_date).items()

df = pd.DataFrame(columns = ['created_at', 'user', 'text', 'place', 'coordinates'])

for t in tweets:
    df = df.append({'created_at':t.created_at,
                    'user': t.user,
                    'text': t.text,
                    'place': t.place, 
                    'coordinates': t.coordinates},
                   ignore_index = True)
    
df

Unnamed: 0,created_at,user,text,place,coordinates
0,2020-12-02 17:59:18,User(_api=<tweepy.api.API object at 0x7f1b9414...,@DoorDash_Help So i get my order cancelled due...,,
1,2020-12-02 17:54:11,User(_api=<tweepy.api.API object at 0x7f1b9414...,@DoorDash I hope I really did get credited for...,,
2,2020-12-02 15:13:15,User(_api=<tweepy.api.API object at 0x7f1b9414...,@suatkilicMD @CBSNews And its a lot. If you do...,,
3,2020-12-02 07:31:12,User(_api=<tweepy.api.API object at 0x7f1b9414...,@dangerousssdi No I had credit cause DoorDash ...,,
4,2020-12-02 01:18:44,User(_api=<tweepy.api.API object at 0x7f1b9414...,"Welp @DoorDash, you fucked me over for the ver...",,
5,2020-12-02 00:29:16,User(_api=<tweepy.api.API object at 0x7f1b9414...,@DoorDash just saw a commercial repping my hom...,,
6,2020-12-01 16:45:50,User(_api=<tweepy.api.API object at 0x7f1b9414...,@DoorDash @BurgerKing I ordered last night and...,Place(_api=<tweepy.api.API object at 0x7f1b941...,
7,2020-12-01 01:00:14,User(_api=<tweepy.api.API object at 0x7f1b9414...,@RogerClark41 Some apps are set on items &amp;...,,
8,2020-11-30 21:45:46,User(_api=<tweepy.api.API object at 0x7f1b9414...,@Justicesross I order it from DoorDash,,
9,2020-11-30 20:17:36,User(_api=<tweepy.api.API object at 0x7f1b9414...,A fresh batch of our Jumbo BBQ Wings is ready ...,Place(_api=<tweepy.api.API object at 0x7f1b941...,"{'type': 'Point', 'coordinates': [-73.99014, 4..."
