## Feature Exploration & Cleaning: Global Mobility Report, US Counties

In [5]:
import pandas as pd
import numpy as np
import datetime as dt
import sqlite3
from sklearn.impute import SimpleImputer, MissingIndicator
from sklearn_pandas import DataFrameMapper

In [6]:
conn=sqlite3.connect('COVID19_county_data.db')
cursor= conn.cursor()

In [7]:
#Create function to query SQL data
def query_data(sql_statement):
    df=pd.read_sql(sql_statement, conn)
    #cursor.execute(sql_statement)
    return df.to_dict('records')

In [8]:
#Read Global mobility report, csv from https://www.google.com/covid19/mobility/
df=pd.read_csv('Global_Mobility_Report_v4.csv') #Data from Google

In [9]:
df.head()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,AE,United Arab Emirates,,,2020-02-15,0.0,4.0,5.0,0.0,2.0,1.0
1,AE,United Arab Emirates,,,2020-02-16,1.0,4.0,4.0,1.0,2.0,1.0
2,AE,United Arab Emirates,,,2020-02-17,-1.0,1.0,5.0,1.0,2.0,1.0
3,AE,United Arab Emirates,,,2020-02-18,-2.0,1.0,5.0,0.0,2.0,1.0
4,AE,United Arab Emirates,,,2020-02-19,-2.0,0.0,4.0,-1.0,2.0,1.0


In [10]:
df['country_region'].unique()

array(['United Arab Emirates', 'Afghanistan', 'Antigua and Barbuda',
       'Angola', 'Argentina', 'Austria', 'Australia', 'Aruba',
       'Bosnia and Herzegovina', 'Barbados', 'Bangladesh', 'Belgium',
       'Burkina Faso', 'Bulgaria', 'Bahrain', 'Benin', 'Bolivia',
       'Brazil', 'The Bahamas', 'Botswana', 'Belarus', 'Belize', 'Canada',
       'Switzerland', "Côte d'Ivoire", 'Chile', 'Cameroon', 'Colombia',
       'Costa Rica', 'Cape Verde', 'Czechia', 'Germany', 'Denmark',
       'Dominican Republic', 'Ecuador', 'Estonia', 'Egypt', 'Spain',
       'Finland', 'Fiji', 'France', 'Gabon', 'United Kingdom', 'Georgia',
       'Ghana', 'Greece', 'Guatemala', 'Guinea-Bissau', 'Hong Kong',
       'Honduras', 'Croatia', 'Haiti', 'Hungary', 'Indonesia', 'Ireland',
       'Israel', 'India', 'Iraq', 'Italy', 'Jamaica', 'Jordan', 'Japan',
       'Kenya', 'Kyrgyzstan', 'Cambodia', 'South Korea', 'Kuwait',
       'Kazakhstan', 'Laos', 'Lebanon', 'Liechtenstein', 'Sri Lanka',
       'Lithuania', '

In [11]:
US_df=df[df['country_region']=='United States']

In [12]:
US_df.head()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
111798,US,United States,,,2020-02-15,6.0,2.0,15.0,3.0,2.0,-1.0
111799,US,United States,,,2020-02-16,7.0,1.0,16.0,2.0,0.0,-1.0
111800,US,United States,,,2020-02-17,6.0,0.0,28.0,-9.0,-24.0,5.0
111801,US,United States,,,2020-02-18,0.0,-1.0,6.0,1.0,0.0,1.0
111802,US,United States,,,2020-02-19,2.0,0.0,8.0,1.0,1.0,0.0


In [13]:
US_df.tail()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
359324,US,United States,Wyoming,Weston County,2020-05-11,,,,,-31.0,
359325,US,United States,Wyoming,Weston County,2020-05-12,,,,,-34.0,
359326,US,United States,Wyoming,Weston County,2020-05-13,,,,,-30.0,
359327,US,United States,Wyoming,Weston County,2020-05-14,,,,,-25.0,
359328,US,United States,Wyoming,Weston County,2020-05-15,,,,,-25.0,


In [14]:
US_df.isna().sum()

country_region_code                                        0
country_region                                             0
sub_region_1                                              92
sub_region_2                                            4784
date                                                       0
retail_and_recreation_percent_change_from_baseline     64433
grocery_and_pharmacy_percent_change_from_baseline      72521
parks_percent_change_from_baseline                    183121
transit_stations_percent_change_from_baseline         150870
workplaces_percent_change_from_baseline                10155
residential_percent_change_from_baseline              135758
dtype: int64

In [15]:
US_df=US_df.dropna(subset=['sub_region_2']) #drop whole state values, where County is 'NaN'

In [16]:
US_df.head()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
111982,US,United States,Alabama,Autauga County,2020-02-15,5.0,7.0,,,-4.0,
111983,US,United States,Alabama,Autauga County,2020-02-16,0.0,1.0,-23.0,,-4.0,
111984,US,United States,Alabama,Autauga County,2020-02-17,8.0,0.0,,,-27.0,5.0
111985,US,United States,Alabama,Autauga County,2020-02-18,-2.0,0.0,,,2.0,0.0
111986,US,United States,Alabama,Autauga County,2020-02-19,-2.0,0.0,,,2.0,0.0


In [17]:
US_df.dtypes

country_region_code                                    object
country_region                                         object
sub_region_1                                           object
sub_region_2                                           object
date                                                   object
retail_and_recreation_percent_change_from_baseline    float64
grocery_and_pharmacy_percent_change_from_baseline     float64
parks_percent_change_from_baseline                    float64
transit_stations_percent_change_from_baseline         float64
workplaces_percent_change_from_baseline               float64
residential_percent_change_from_baseline              float64
dtype: object

In [18]:
US_df.isna().sum()

country_region_code                                        0
country_region                                             0
sub_region_1                                               0
sub_region_2                                               0
date                                                       0
retail_and_recreation_percent_change_from_baseline     64433
grocery_and_pharmacy_percent_change_from_baseline      72521
parks_percent_change_from_baseline                    183115
transit_stations_percent_change_from_baseline         150870
workplaces_percent_change_from_baseline                10155
residential_percent_change_from_baseline              135710
dtype: int64

In [19]:
num_US_df=US_df.select_dtypes('number')

In [20]:
num_US_df.head()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
111982,5.0,7.0,,,-4.0,
111983,0.0,1.0,-23.0,,-4.0,
111984,8.0,0.0,,,-27.0,5.0
111985,-2.0,0.0,,,2.0,0.0
111986,-2.0,0.0,,,2.0,0.0


In [21]:
#Create steps to create column to indicate missing values, and fill missing values with 0
steps=[]
for col in num_US_df.columns:
    steps.append(([col], [SimpleImputer(strategy='constant', fill_value=0)]))
    steps.append(([col], [MissingIndicator()],{'alias':f'{col}_isna'}))
steps

[(['retail_and_recreation_percent_change_from_baseline'],
  [SimpleImputer(add_indicator=False, copy=True, fill_value=0, missing_values=nan,
                 strategy='constant', verbose=0)]),
 (['retail_and_recreation_percent_change_from_baseline'],
  [MissingIndicator(error_on_new=True, features='missing-only', missing_values=nan,
                    sparse='auto')],
  {'alias': 'retail_and_recreation_percent_change_from_baseline_isna'}),
 (['grocery_and_pharmacy_percent_change_from_baseline'],
  [SimpleImputer(add_indicator=False, copy=True, fill_value=0, missing_values=nan,
                 strategy='constant', verbose=0)]),
 (['grocery_and_pharmacy_percent_change_from_baseline'],
  [MissingIndicator(error_on_new=True, features='missing-only', missing_values=nan,
                    sparse='auto')],
  {'alias': 'grocery_and_pharmacy_percent_change_from_baseline_isna'}),
 (['parks_percent_change_from_baseline'],
  [SimpleImputer(add_indicator=False, copy=True, fill_value=0, missing_

In [22]:
#Applies transformer steps and replaces NaN with 0 for entire dataframe
mapper=DataFrameMapper(steps, default=None, df_out=True)

In [23]:
clean_US_df=mapper.fit_transform(US_df)

In [24]:
clean_US_df.head()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna,residential_percent_change_from_baseline,residential_percent_change_from_baseline_isna,country_region_code,country_region,sub_region_1,sub_region_2,date
111982,5.0,False,7.0,False,0.0,True,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga County,2020-02-15
111983,0.0,False,1.0,False,-23.0,False,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga County,2020-02-16
111984,8.0,False,0.0,False,0.0,True,0.0,True,-27.0,False,5.0,False,US,United States,Alabama,Autauga County,2020-02-17
111985,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga County,2020-02-18
111986,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga County,2020-02-19


In [25]:
clean_US_df.tail()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna,residential_percent_change_from_baseline,residential_percent_change_from_baseline_isna,country_region_code,country_region,sub_region_1,sub_region_2,date
359324,0.0,True,0.0,True,0.0,True,0.0,True,-31.0,False,0.0,True,US,United States,Wyoming,Weston County,2020-05-11
359325,0.0,True,0.0,True,0.0,True,0.0,True,-34.0,False,0.0,True,US,United States,Wyoming,Weston County,2020-05-12
359326,0.0,True,0.0,True,0.0,True,0.0,True,-30.0,False,0.0,True,US,United States,Wyoming,Weston County,2020-05-13
359327,0.0,True,0.0,True,0.0,True,0.0,True,-25.0,False,0.0,True,US,United States,Wyoming,Weston County,2020-05-14
359328,0.0,True,0.0,True,0.0,True,0.0,True,-25.0,False,0.0,True,US,United States,Wyoming,Weston County,2020-05-15


In [26]:
clean_US_df.isna().sum()

retail_and_recreation_percent_change_from_baseline         0
retail_and_recreation_percent_change_from_baseline_isna    0
grocery_and_pharmacy_percent_change_from_baseline          0
grocery_and_pharmacy_percent_change_from_baseline_isna     0
parks_percent_change_from_baseline                         0
parks_percent_change_from_baseline_isna                    0
transit_stations_percent_change_from_baseline              0
transit_stations_percent_change_from_baseline_isna         0
workplaces_percent_change_from_baseline                    0
workplaces_percent_change_from_baseline_isna               0
residential_percent_change_from_baseline                   0
residential_percent_change_from_baseline_isna              0
country_region_code                                        0
country_region                                             0
sub_region_1                                               0
sub_region_2                                               0
date                    

In [27]:
clean_US_df.dtypes

retail_and_recreation_percent_change_from_baseline         float64
retail_and_recreation_percent_change_from_baseline_isna       bool
grocery_and_pharmacy_percent_change_from_baseline          float64
grocery_and_pharmacy_percent_change_from_baseline_isna        bool
parks_percent_change_from_baseline                         float64
parks_percent_change_from_baseline_isna                       bool
transit_stations_percent_change_from_baseline              float64
transit_stations_percent_change_from_baseline_isna            bool
workplaces_percent_change_from_baseline                    float64
workplaces_percent_change_from_baseline_isna                  bool
residential_percent_change_from_baseline                   float64
residential_percent_change_from_baseline_isna                 bool
country_region_code                                         object
country_region                                              object
sub_region_1                                                ob

In [28]:
clean_US_df['date']=pd.to_datetime(clean_US_df['date'])

In [29]:
clean_US_df.dtypes

retail_and_recreation_percent_change_from_baseline                float64
retail_and_recreation_percent_change_from_baseline_isna              bool
grocery_and_pharmacy_percent_change_from_baseline                 float64
grocery_and_pharmacy_percent_change_from_baseline_isna               bool
parks_percent_change_from_baseline                                float64
parks_percent_change_from_baseline_isna                              bool
transit_stations_percent_change_from_baseline                     float64
transit_stations_percent_change_from_baseline_isna                   bool
workplaces_percent_change_from_baseline                           float64
workplaces_percent_change_from_baseline_isna                         bool
residential_percent_change_from_baseline                          float64
residential_percent_change_from_baseline_isna                        bool
country_region_code                                                object
country_region                        

In [30]:
clean_US_df['date'].iloc[0]

Timestamp('2020-02-15 00:00:00')

In [31]:
clean_US_df['date'].iloc[0].weekday()

5

In [32]:
def weekend_encoder(datetime_val, mode= 'weekend'):
    week_vals={0:'Monday',1:'Tuesday',2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6: 'Sunday'}
    if mode=='weekend':
        if datetime_val.weekday()>=5:
            return 1
        else:
            return 0
    if mode=='day_of_week':
        return week_vals[datetime_val.weekday()]   

In [33]:
weekends=[weekend_encoder(date) for date in clean_US_df['date']]

In [34]:
weekends[:20]

[1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0]

In [35]:
day_of_week=[weekend_encoder(date, mode='day_of_week') for date in clean_US_df['date']]

In [36]:
day_of_week[:20]

['Saturday',
 'Sunday',
 'Monday',
 'Tuesday',
 'Wednesday',
 'Thursday',
 'Friday',
 'Saturday',
 'Sunday',
 'Monday',
 'Tuesday',
 'Wednesday',
 'Thursday',
 'Friday',
 'Saturday',
 'Sunday',
 'Monday',
 'Tuesday',
 'Wednesday',
 'Thursday']

In [37]:
clean_US_df['weekend']=weekends
clean_US_df['day_of_week']=day_of_week

In [38]:
clean_US_df.head()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna,residential_percent_change_from_baseline,residential_percent_change_from_baseline_isna,country_region_code,country_region,sub_region_1,sub_region_2,date,weekend,day_of_week
111982,5.0,False,7.0,False,0.0,True,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga County,2020-02-15,1,Saturday
111983,0.0,False,1.0,False,-23.0,False,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga County,2020-02-16,1,Sunday
111984,8.0,False,0.0,False,0.0,True,0.0,True,-27.0,False,5.0,False,US,United States,Alabama,Autauga County,2020-02-17,0,Monday
111985,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga County,2020-02-18,0,Tuesday
111986,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga County,2020-02-19,0,Wednesday


In [39]:
'Autauga County'.split(' ')[0]

'Autauga'

In [40]:
clean_US_df['sub_region_2']=clean_US_df['sub_region_2'].apply(lambda x: x.split(' ')[0])

In [41]:
clean_US_df.head()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna,residential_percent_change_from_baseline,residential_percent_change_from_baseline_isna,country_region_code,country_region,sub_region_1,sub_region_2,date,weekend,day_of_week
111982,5.0,False,7.0,False,0.0,True,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga,2020-02-15,1,Saturday
111983,0.0,False,1.0,False,-23.0,False,0.0,True,-4.0,False,0.0,True,US,United States,Alabama,Autauga,2020-02-16,1,Sunday
111984,8.0,False,0.0,False,0.0,True,0.0,True,-27.0,False,5.0,False,US,United States,Alabama,Autauga,2020-02-17,0,Monday
111985,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga,2020-02-18,0,Tuesday
111986,-2.0,False,0.0,False,0.0,True,0.0,True,2.0,False,0.0,False,US,United States,Alabama,Autauga,2020-02-19,0,Wednesday


In [42]:
clean_US_df_records=clean_US_df.to_dict('records')

In [43]:
county_state=[f"{record['sub_region_2'], record['sub_region_1'] }" for record in clean_US_df_records]

In [44]:
county_state[0]

"('Autauga', 'Alabama')"

In [45]:
clean_US_df['County_State']=county_state

In [46]:
clean_US_df.tail()

Unnamed: 0,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna,residential_percent_change_from_baseline,residential_percent_change_from_baseline_isna,country_region_code,country_region,sub_region_1,sub_region_2,date,weekend,day_of_week,County_State
359324,0.0,True,0.0,True,0.0,True,0.0,True,-31.0,False,0.0,True,US,United States,Wyoming,Weston,2020-05-11,0,Monday,"('Weston', 'Wyoming')"
359325,0.0,True,0.0,True,0.0,True,0.0,True,-34.0,False,0.0,True,US,United States,Wyoming,Weston,2020-05-12,0,Tuesday,"('Weston', 'Wyoming')"
359326,0.0,True,0.0,True,0.0,True,0.0,True,-30.0,False,0.0,True,US,United States,Wyoming,Weston,2020-05-13,0,Wednesday,"('Weston', 'Wyoming')"
359327,0.0,True,0.0,True,0.0,True,0.0,True,-25.0,False,0.0,True,US,United States,Wyoming,Weston,2020-05-14,0,Thursday,"('Weston', 'Wyoming')"
359328,0.0,True,0.0,True,0.0,True,0.0,True,-25.0,False,0.0,True,US,United States,Wyoming,Weston,2020-05-15,0,Friday,"('Weston', 'Wyoming')"


In [47]:
clean_US_df.to_sql('global_mobility', conn, index_label='id', if_exists='replace')

In [48]:
new_df= pd.DataFrame(query_data('SELECT * FROM global_mobility'))

In [49]:
new_df.tail()

Unnamed: 0,County_State,country_region,country_region_code,date,day_of_week,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,id,parks_percent_change_from_baseline,parks_percent_change_from_baseline_isna,...,residential_percent_change_from_baseline_isna,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,sub_region_1,sub_region_2,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,weekend,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna
242742,"('Weston', 'Wyoming')",United States,US,2020-05-11 00:00:00,Monday,0.0,1,359324,0.0,1,...,1,0.0,1,Wyoming,Weston,0.0,1,0,-31.0,0
242743,"('Weston', 'Wyoming')",United States,US,2020-05-12 00:00:00,Tuesday,0.0,1,359325,0.0,1,...,1,0.0,1,Wyoming,Weston,0.0,1,0,-34.0,0
242744,"('Weston', 'Wyoming')",United States,US,2020-05-13 00:00:00,Wednesday,0.0,1,359326,0.0,1,...,1,0.0,1,Wyoming,Weston,0.0,1,0,-30.0,0
242745,"('Weston', 'Wyoming')",United States,US,2020-05-14 00:00:00,Thursday,0.0,1,359327,0.0,1,...,1,0.0,1,Wyoming,Weston,0.0,1,0,-25.0,0
242746,"('Weston', 'Wyoming')",United States,US,2020-05-15 00:00:00,Friday,0.0,1,359328,0.0,1,...,1,0.0,1,Wyoming,Weston,0.0,1,0,-25.0,0


In [50]:
new_df.dtypes

County_State                                                object
country_region                                              object
country_region_code                                         object
date                                                        object
day_of_week                                                 object
grocery_and_pharmacy_percent_change_from_baseline          float64
grocery_and_pharmacy_percent_change_from_baseline_isna       int64
id                                                           int64
parks_percent_change_from_baseline                         float64
parks_percent_change_from_baseline_isna                      int64
residential_percent_change_from_baseline                   float64
residential_percent_change_from_baseline_isna                int64
retail_and_recreation_percent_change_from_baseline         float64
retail_and_recreation_percent_change_from_baseline_isna      int64
sub_region_1                                                ob

In [51]:
FIPS_df=pd.DataFrame(query_data('SELECT * FROM fips_codes'))
FIPS_df.head()

Unnamed: 0,County,County_State,FIPS,State,id
0,Autauga,"('Autauga', 'Alabama')",1001,Alabama,0
1,Baldwin,"('Baldwin', 'Alabama')",1003,Alabama,116
2,Barbour,"('Barbour', 'Alabama')",1005,Alabama,232
3,Bibb,"('Bibb', 'Alabama')",1007,Alabama,348
4,Blount,"('Blount', 'Alabama')",1009,Alabama,464


In [52]:
global_fips_df=pd.DataFrame(query_data('''SELECT fips_codes.FIPS, global_mobility.*  FROM global_mobility 
                JOIN fips_codes ON fips_codes.County_State=global_mobility.County_State
               '''))

In [53]:
global_fips_df.head()

Unnamed: 0,County_State,FIPS,country_region,country_region_code,date,day_of_week,grocery_and_pharmacy_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline_isna,id,parks_percent_change_from_baseline,...,residential_percent_change_from_baseline_isna,retail_and_recreation_percent_change_from_baseline,retail_and_recreation_percent_change_from_baseline_isna,sub_region_1,sub_region_2,transit_stations_percent_change_from_baseline,transit_stations_percent_change_from_baseline_isna,weekend,workplaces_percent_change_from_baseline,workplaces_percent_change_from_baseline_isna
0,"('Autauga', 'Alabama')",1001,United States,US,2020-02-15 00:00:00,Saturday,7.0,0,111982,0.0,...,1,5.0,0,Alabama,Autauga,0.0,1,1,-4.0,0
1,"('Autauga', 'Alabama')",1001,United States,US,2020-02-16 00:00:00,Sunday,1.0,0,111983,-23.0,...,1,0.0,0,Alabama,Autauga,0.0,1,1,-4.0,0
2,"('Autauga', 'Alabama')",1001,United States,US,2020-02-17 00:00:00,Monday,0.0,0,111984,0.0,...,0,8.0,0,Alabama,Autauga,0.0,1,0,-27.0,0
3,"('Autauga', 'Alabama')",1001,United States,US,2020-02-18 00:00:00,Tuesday,0.0,0,111985,0.0,...,0,-2.0,0,Alabama,Autauga,0.0,1,0,2.0,0
4,"('Autauga', 'Alabama')",1001,United States,US,2020-02-19 00:00:00,Wednesday,0.0,0,111986,0.0,...,0,-2.0,0,Alabama,Autauga,0.0,1,0,2.0,0


In [54]:
global_fips_df.columns

Index(['County_State', 'FIPS', 'country_region', 'country_region_code', 'date',
       'day_of_week', 'grocery_and_pharmacy_percent_change_from_baseline',
       'grocery_and_pharmacy_percent_change_from_baseline_isna', 'id',
       'parks_percent_change_from_baseline',
       'parks_percent_change_from_baseline_isna',
       'residential_percent_change_from_baseline',
       'residential_percent_change_from_baseline_isna',
       'retail_and_recreation_percent_change_from_baseline',
       'retail_and_recreation_percent_change_from_baseline_isna',
       'sub_region_1', 'sub_region_2',
       'transit_stations_percent_change_from_baseline',
       'transit_stations_percent_change_from_baseline_isna', 'weekend',
       'workplaces_percent_change_from_baseline',
       'workplaces_percent_change_from_baseline_isna'],
      dtype='object')

In [55]:
global_fips_df['FIPS'] = global_fips_df['FIPS'].apply(lambda x: str(int(x)).zfill(5))

In [56]:
global_fips_df.drop(columns='id').to_sql('global_mobility', conn, index_label='id', if_exists='replace')