# SafeGraph foot traffic data processing

Read the most recent week csv from Safeguard
s3://sg-c19-response/weekly-patterns/
SafeGraph information about their data:
https://docs.google.com/spreadsheets/u/1/d/1UNWvPzkUTTlXBZ6M6iGhM_7sr8h-MxsZdE7iOszkAmk/htmlview#

In [5]:
import pandas as pd

file = "2020-05-04-weekly-patterns.csv"
week = file[0:10]
main_df = pd.read_csv(file)
main_df['zip_int'] = pd.to_numeric(main_df.postal_code)
main_df = main_df[(main_df.zip_int >= 90001) & (main_df.zip_int <= 90899)]
main_df.columns

Index(['safegraph_place_id', 'location_name', 'street_address', 'city',
       'region', 'postal_code', 'brands', 'naics_code', 'date_range_start',
       'date_range_end', 'raw_visit_counts', 'raw_visitor_counts',
       'visits_by_day', 'visits_by_each_hour', 'visitor_home_cbgs',
       'visitor_country_of_origin', 'distance_from_home', 'median_dwell',
       'bucketed_dwell_times', 'related_same_day_brand',
       'related_same_week_brand', 'device_type', 'iso_country_code',
       'zip_int'],
      dtype='object')

Selecting the columns of interest

In [6]:
main_df = main_df[['safegraph_place_id', 'location_name', 'street_address', 'city', 'postal_code', 'brands',
                   'naics_code', 'raw_visit_counts', 'raw_visitor_counts', 'visits_by_day', 'distance_from_home',
                   'median_dwell']]
main_df

Unnamed: 0,safegraph_place_id,location_name,street_address,city,postal_code,brands,naics_code,raw_visit_counts,raw_visitor_counts,visits_by_day,distance_from_home,median_dwell
116,sg:03eaa2b5b78646c49dcc2aa05ce3131c,El Pollo Inka,15400 Hawthorne Blvd,Lawndale,90260,El Pollo Inka,722511.0,26,25,"[1,0,1,6,6,6,6]",5610.0,9.0
123,sg:043e398a8ff240a5ba5f323fa36317f4,Pizza Hut,1173 W Carson St,Torrance,90502,Pizza Hut,722513.0,15,14,"[1,1,3,2,3,1,4]",2029.0,9.0
170,sg:05e114ea81c04f6aabb93972c2164db1,McDonald's,4947 Huntington Dr N,Los Angeles,90032,McDonald's,722513.0,106,90,"[17,12,21,7,10,19,20]",1333.0,8.0
178,sg:06080e63312648bb9c0f64e636af86dd,Knott's Chicken To Go,California Marketplace Knotts Berry Farm,Buena Park,90620,,722511.0,70,56,"[0,2,2,10,11,25,20]",25892.0,28.0
277,sg:09503ad6209a48d0b4c62d8a8aec4320,Big Lots Stores,9020 Firestone Blvd,Downey,90241,Big Lots Stores,452319.0,88,77,"[10,4,11,10,20,12,21]",4376.0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3676089,sg:f8848b4100dd4eafba617046048edf7e,Professional Music Teacher and Piano Tuner,15859 Janine Dr,Whittier,90603,,451140.0,13,4,"[0,1,3,3,1,3,2]",,102.0
3676098,sg:f8f9da1c84344ab3b3e066ffbd5de36c,ARCO,3100 N Los Coyotes Diagonal,Long Beach,90808,ARCO,447110.0,32,28,"[3,5,4,3,7,4,6]",3738.0,6.5
3676145,sg:fa96742bd6844e39a32e1940a8e7317a,Huntington Harbor Boat Rentals,16732 Pacific Coast Hwy,Sunset Beach,90742,,441222.0,24,14,"[0,2,1,4,5,7,5]",2637.0,24.0
3676168,sg:fb94c5c46de54ece8bdb6ffecc182de5,Family Mennonite Church,6520 S Normandie Ave,Los Angeles,90044,,813110.0,3,3,"[0,0,0,0,0,1,2]",,29.0


Category Statistics table is downloaded here:
https://docs.safegraph.com/docs/places-summary-statistics#section-all-places

Use apply to get categories from naics code

In [3]:
def get_category(df):
    if 445100 <= df.naics_code <= 445199:
        return "Supermarket"
    if df.naics_code == 722511:
        return "Sit Down Restaurants"
    if df.naics_code == 722513:
        return "Counter Service Restaurants"
    if df.naics_code == 722515:
        return "Snack Bar"
    if 722400 <= df.naics_code <= 722499:
        return "Bars"
    if 620000 <= df.naics_code <= 629999:
        return "Health Care"
    if df.naics_code == 531120:
        return "Shopping Mall"
    if df.naics_code == 512131:
        return "Movie Theater"
    if 440000 <= df.naics_code <= 459999:
        return "General Stores"
    return "Other"

main_df['category'] = main_df.apply(get_category, axis=1)
main_df


Unnamed: 0,safegraph_place_id,location_name,street_address,city,postal_code,brands,naics_code,raw_visit_counts,raw_visitor_counts,visits_by_day,distance_from_home,median_dwell,category
43,sg:01710fd28d364ee4abbca7d8692669ba,Momentum Fitness,1635 Challenge Dr,Concord,94520,,713940.0,6,2,"[0,1,0,3,2,0,0]",,241.5,Other
79,sg:0294a45d5c4b430482321a0282900986,76,805 S Harbor Blvd,Anaheim,92805,76,447110.0,16,15,"[2,3,2,0,4,3,2]",4416.0,7.0,General Stores
95,sg:0327ad5802114d2fbfa8d170552bfc30,Life Science Outsourcing Inc,830 Challenger St,Brea,92821,,621610.0,45,13,"[0,7,9,9,10,10,0]",16592.0,408.0,Health Care
116,sg:03eaa2b5b78646c49dcc2aa05ce3131c,El Pollo Inka,15400 Hawthorne Blvd,Lawndale,90260,El Pollo Inka,722511.0,26,25,"[1,0,1,6,6,6,6]",5610.0,9.0,Sit Down Restaurants
118,sg:03f9e9bad3254df3afeeea20fd4c188b,Frank's Tire and Brakes,299 29th St,Oakland,94611,,441310.0,66,54,"[5,15,9,10,4,10,13]",6014.0,22.0,General Stores
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3676257,sg:ff39a2712c3b4e9bba46d253de4a8074,Creative Memories,1609 Petri Pl,San Jose,95118,,451130.0,13,9,"[0,0,0,1,1,6,5]",2085.0,103.0,General Stores
3676258,sg:ff441c4f0a4f45699528d74178559458,El Avisador Magazine,460 W Taylor St,San Jose,95110,,451211.0,10,8,"[2,3,1,1,2,1,0]",5601.0,63.5,General Stores
3676260,sg:ff473ee639a5420eac5ab685db4d55d0,Harry Schmidt Park,Linden & Meredith Ave,Gustine,95322,,712190.0,62,34,"[7,12,11,9,6,7,10]",7594.0,40.0,Other
3676272,sg:ffce99ddb5594d0f92cb1e0e44a90411,Inland Empire Barbell,1200 Arizona St Unit Unit B7,Redlands,92374,,713940.0,10,6,"[0,2,1,1,2,1,3]",5619.0,55.5,Other


Opening last week's data

In [4]:
last_week_file = "2020-04-26-weekly-patterns.csv"
last_week = last_week_file[0:10]
last_week_df = pd.read_csv(last_week_file)
last_week_df = last_week_df[last_week_df.region == 'CA']
last_week_df = last_week_df[['safegraph_place_id', 'visits_by_day']]
last_week_df

Unnamed: 0,safegraph_place_id,visits_by_day
3,sg:00128c68c0a44501b7ff7f5ac4fef61a,"[0,0,0,0,0,1,0]"
12,sg:0061238a547f4fa9b400113d28334dcc,"[0,1,0,0,0,0,0]"
38,sg:0176f163f007467ab14d6c9f250ad476,"[6,1,1,3,3,2,1]"
44,sg:01a51f95c66e40efb4f67b5eca63a8c1,"[0,1,0,0,0,0,0]"
56,sg:0213ee8ce87c4f1e916b800cc7b73be1,"[1,1,0,3,3,2,3]"
...,...,...
3648437,sg:ff39a2712c3b4e9bba46d253de4a8074,"[3,2,6,1,4,2,2]"
3648438,sg:ff441c4f0a4f45699528d74178559458,"[0,1,0,1,4,1,0]"
3648440,sg:ff473ee639a5420eac5ab685db4d55d0,"[10,11,10,7,13,5,13]"
3648452,sg:ffce99ddb5594d0f92cb1e0e44a90411,"[1,1,1,1,1,1,0]"


In [5]:
column_name = "visits_by_day_" + week
main_df.rename(columns={"visits_by_day": column_name}, inplace=True)
main_df

Unnamed: 0,safegraph_place_id,location_name,street_address,city,postal_code,brands,naics_code,raw_visit_counts,raw_visitor_counts,visits_by_day_2020-05-03,distance_from_home,median_dwell,category
43,sg:01710fd28d364ee4abbca7d8692669ba,Momentum Fitness,1635 Challenge Dr,Concord,94520,,713940.0,6,2,"[0,1,0,3,2,0,0]",,241.5,Other
79,sg:0294a45d5c4b430482321a0282900986,76,805 S Harbor Blvd,Anaheim,92805,76,447110.0,16,15,"[2,3,2,0,4,3,2]",4416.0,7.0,General Stores
95,sg:0327ad5802114d2fbfa8d170552bfc30,Life Science Outsourcing Inc,830 Challenger St,Brea,92821,,621610.0,45,13,"[0,7,9,9,10,10,0]",16592.0,408.0,Health Care
116,sg:03eaa2b5b78646c49dcc2aa05ce3131c,El Pollo Inka,15400 Hawthorne Blvd,Lawndale,90260,El Pollo Inka,722511.0,26,25,"[1,0,1,6,6,6,6]",5610.0,9.0,Sit Down Restaurants
118,sg:03f9e9bad3254df3afeeea20fd4c188b,Frank's Tire and Brakes,299 29th St,Oakland,94611,,441310.0,66,54,"[5,15,9,10,4,10,13]",6014.0,22.0,General Stores
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3676257,sg:ff39a2712c3b4e9bba46d253de4a8074,Creative Memories,1609 Petri Pl,San Jose,95118,,451130.0,13,9,"[0,0,0,1,1,6,5]",2085.0,103.0,General Stores
3676258,sg:ff441c4f0a4f45699528d74178559458,El Avisador Magazine,460 W Taylor St,San Jose,95110,,451211.0,10,8,"[2,3,1,1,2,1,0]",5601.0,63.5,General Stores
3676260,sg:ff473ee639a5420eac5ab685db4d55d0,Harry Schmidt Park,Linden & Meredith Ave,Gustine,95322,,712190.0,62,34,"[7,12,11,9,6,7,10]",7594.0,40.0,Other
3676272,sg:ffce99ddb5594d0f92cb1e0e44a90411,Inland Empire Barbell,1200 Arizona St Unit Unit B7,Redlands,92374,,713940.0,10,6,"[0,2,1,1,2,1,3]",5619.0,55.5,Other


Merging this week with last weeks data

In [6]:
main_df = pd.merge(main_df, last_week_df, how='inner', on='safegraph_place_id')
column_name = "visits_by_day_" + last_week
main_df.rename(columns={"visits_by_day": column_name}, inplace=True)
main_df


Unnamed: 0,safegraph_place_id,location_name,street_address,city,postal_code,brands,naics_code,raw_visit_counts,raw_visitor_counts,visits_by_day_2020-05-03,distance_from_home,median_dwell,category,visits_by_day_2020-04-26
0,sg:01710fd28d364ee4abbca7d8692669ba,Momentum Fitness,1635 Challenge Dr,Concord,94520,,713940.0,6,2,"[0,1,0,3,2,0,0]",,241.5,Other,"[0,1,0,0,2,0,0]"
1,sg:0294a45d5c4b430482321a0282900986,76,805 S Harbor Blvd,Anaheim,92805,76,447110.0,16,15,"[2,3,2,0,4,3,2]",4416.0,7.0,General Stores,"[6,4,4,6,2,3,4]"
2,sg:0327ad5802114d2fbfa8d170552bfc30,Life Science Outsourcing Inc,830 Challenger St,Brea,92821,,621610.0,45,13,"[0,7,9,9,10,10,0]",16592.0,408.0,Health Care,"[0,9,10,7,11,9,1]"
3,sg:03eaa2b5b78646c49dcc2aa05ce3131c,El Pollo Inka,15400 Hawthorne Blvd,Lawndale,90260,El Pollo Inka,722511.0,26,25,"[1,0,1,6,6,6,6]",5610.0,9.0,Sit Down Restaurants,"[6,4,6,4,7,7,7]"
4,sg:03f9e9bad3254df3afeeea20fd4c188b,Frank's Tire and Brakes,299 29th St,Oakland,94611,,441310.0,66,54,"[5,15,9,10,4,10,13]",6014.0,22.0,General Stores,"[2,6,8,6,3,4,9]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388350,sg:ff39a2712c3b4e9bba46d253de4a8074,Creative Memories,1609 Petri Pl,San Jose,95118,,451130.0,13,9,"[0,0,0,1,1,6,5]",2085.0,103.0,General Stores,"[3,2,6,1,4,2,2]"
388351,sg:ff441c4f0a4f45699528d74178559458,El Avisador Magazine,460 W Taylor St,San Jose,95110,,451211.0,10,8,"[2,3,1,1,2,1,0]",5601.0,63.5,General Stores,"[0,1,0,1,4,1,0]"
388352,sg:ff473ee639a5420eac5ab685db4d55d0,Harry Schmidt Park,Linden & Meredith Ave,Gustine,95322,,712190.0,62,34,"[7,12,11,9,6,7,10]",7594.0,40.0,Other,"[10,11,10,7,13,5,13]"
388353,sg:ffce99ddb5594d0f92cb1e0e44a90411,Inland Empire Barbell,1200 Arizona St Unit Unit B7,Redlands,92374,,713940.0,10,6,"[0,2,1,1,2,1,3]",5619.0,55.5,Other,"[1,1,1,1,1,1,0]"
