### Import dependencies and load the data file

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.stats as st


In [3]:
trips_file = "Trips_by_Distance.csv"
trips_df = pd.read_csv(trips_file).dropna(how='any')
del trips_df['State FIPS'], trips_df['County FIPS']
trips_df.head()

Unnamed: 0,Level,Date,State Postal Code,County Name,Population Staying at Home,Population Not Staying at Home,Number of Trips,Number of Trips <1,Number of Trips 1-3,Number of Trips 3-5,Number of Trips 5-10,Number of Trips 10-25,Number of Trips 25-50,Number of Trips 50-100,Number of Trips 100-250,Number of Trips 250-500,Number of Trips >=500
0,County,1/1/2019,NJ,Atlantic County,65412,199206,843588,237921,212263,92618,136909,100079,35613,21178,5311,626,1070
1,County,1/1/2019,NJ,Bergen County,237866,695963,2722275,812551,703573,319765,398958,325758,90433,38672,17805,4522,10238
2,County,1/1/2019,NJ,Burlington County,100791,343232,1315760,350422,316098,150400,197259,201325,58958,28598,8528,1939,2233
3,County,1/1/2019,NJ,Camden County,130209,375319,1488595,441080,373336,176319,215886,188972,53797,26883,9266,1157,1899
4,County,1/1/2019,NJ,Cape May County,24167,68110,273062,75839,64783,33539,45259,30687,13195,6297,2334,326,803


#### Example code for datetime 

In [4]:
# Sample setting up a test dataframe to setup datetime
test_df = pd.DataFrame({'date':['1/15/2020','2/25/2020','5/23/2020'], 'value':[2,6,8]})
test_df

Unnamed: 0,date,value
0,1/15/2020,2
1,2/25/2020,6
2,5/23/2020,8


In [5]:
# Example of how to use the datetime method to strip the date
test_df['datetime'] = pd.to_datetime(test_df['date'],format='%m/%d/%Y')

In [6]:
# Example of extracting the date into segments of day, month, year that can be used for filering, plotting
test_df['day'] = test_df['datetime'].map(lambda x: x.day)
test_df['month'] = test_df['datetime'].map(lambda x: x.month)
test_df['year'] = test_df['datetime'].map(lambda x: x.year)

In [7]:
#Demonstrating how to pullout this item "datetime" to plot, will use "datetime"to plot and graph
test_df.loc[test_df['year']==2020,:]

Unnamed: 0,date,value,datetime,day,month,year
0,1/15/2020,2,2020-01-15,15,1,2020
1,2/25/2020,6,2020-02-25,25,2,2020
2,5/23/2020,8,2020-05-23,23,5,2020


### Data cleanup

#### Convert strings to integers so that column values and be calcuated

In [8]:
#trips_distance_df.columns

colstoclean = [ 'Population Staying at Home',
       'Population Not Staying at Home', 'Number of Trips',
 'Number of Trips <1', 'Number of Trips 1-3', 'Number of Trips 3-5',
       'Number of Trips 5-10', 'Number of Trips 10-25',
       'Number of Trips 25-50', 'Number of Trips 50-100',
       'Number of Trips 100-250', 'Number of Trips 250-500',
       'Number of Trips >=500']
for col in colstoclean:
    print(col)
    trips_df[col] = trips_df[col].str.replace(",","").astype(int)

Population Staying at Home
Population Not Staying at Home
Number of Trips
Number of Trips <1
Number of Trips 1-3
Number of Trips 3-5
Number of Trips 5-10
Number of Trips 10-25
Number of Trips 25-50
Number of Trips 50-100
Number of Trips 100-250
Number of Trips 250-500
Number of Trips >=500


In [9]:
# Confirming that numerics are now integers and not strings
trips_df

Unnamed: 0,Level,Date,State Postal Code,County Name,Population Staying at Home,Population Not Staying at Home,Number of Trips,Number of Trips <1,Number of Trips 1-3,Number of Trips 3-5,Number of Trips 5-10,Number of Trips 10-25,Number of Trips 25-50,Number of Trips 50-100,Number of Trips 100-250,Number of Trips 250-500,Number of Trips >=500
0,County,1/1/2019,NJ,Atlantic County,65412,199206,843588,237921,212263,92618,136909,100079,35613,21178,5311,626,1070
1,County,1/1/2019,NJ,Bergen County,237866,695963,2722275,812551,703573,319765,398958,325758,90433,38672,17805,4522,10238
2,County,1/1/2019,NJ,Burlington County,100791,343232,1315760,350422,316098,150400,197259,201325,58958,28598,8528,1939,2233
3,County,1/1/2019,NJ,Camden County,130209,375319,1488595,441080,373336,176319,215886,188972,53797,26883,9266,1157,1899
4,County,1/1/2019,NJ,Cape May County,24167,68110,273062,75839,64783,33539,45259,30687,13195,6297,2334,326,803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16400,County,1/16/2021,NJ,Salem County,14997,47610,254296,61181,46864,30682,42362,52218,15951,3708,979,244,107
16401,County,1/16/2021,NJ,Somerset County,85784,245380,1195251,314062,275977,151579,198198,176577,55641,15600,6417,379,821
16402,County,1/16/2021,NJ,Sussex County,34501,106298,550190,135344,122569,62473,93265,91735,34716,7867,1890,201,130
16403,County,1/16/2021,NJ,Union County,159727,398340,2014171,640972,551051,243946,254801,222017,64600,25170,8913,1673,1028


#### Using datetime to format Date for use in visualizations

In [10]:
#dt = datetime.strptime("21/11/06 16:30", "%d/%m/%y %H:%M")
dt.datetime.strptime('1/1/2019', "%m/%d/%Y")

datetime.datetime(2019, 1, 1, 0, 0)

In [11]:
# Prior to data cleaning I am applying datetime to my dataframe and adding a new column called datetime
trips_df['datetime'] = pd.to_datetime(trips_df['Date'],format='%m/%d/%Y')

In [12]:
# Creating three new distinct columns for components of the date can be used for visualizations
trips_df['day'] = trips_df['datetime'].map(lambda x: x.day)
trips_df['month'] = trips_df['datetime'].map(lambda x: x.month)
trips_df['year'] = trips_df['datetime'].map(lambda x: x.year)

In [14]:
# Show me the data for the year "2020"
#trips_df.loc[trips_df['year']==2020,:]

## Create lists for the years

In [None]:
# Display the 2019 dataframe
#somerset_county = county_trips_2019.loc[county_trips_2019['County Name']=='Somerset County']
#somerset_county.head()

In [18]:
# Create a list for 2019
county_trips_2019 = trips_df.loc[trips_df['year']==2019,:]
county_trips_2019_dec = county_trips_2019.loc[county_trips_2019['month']==12,:]
county_trips_2019_dec

Unnamed: 0,Level,Date,State Postal Code,County Name,Population Staying at Home,Population Not Staying at Home,Number of Trips,Number of Trips <1,Number of Trips 1-3,Number of Trips 3-5,...,Number of Trips 10-25,Number of Trips 25-50,Number of Trips 50-100,Number of Trips 100-250,Number of Trips 250-500,Number of Trips >=500,datetime,day,month,year
7348,County,12/1/2019,NJ,Atlantic County,66055,198563,958617,227435,245935,99485,...,140507,44234,20983,5347,1221,515,2019-12-01,1,12,2019
7349,County,12/1/2019,NJ,Bergen County,221285,712544,3626019,1040883,929753,430133,...,466637,86276,32885,31594,7343,4173,2019-12-01,1,12,2019
7350,County,12/1/2019,NJ,Burlington County,100818,343205,1767839,442970,392723,184132,...,301086,72049,29468,8121,7357,1000,2019-12-01,1,12,2019
7351,County,12/1/2019,NJ,Camden County,128426,377102,2091588,640737,515933,264367,...,271579,44346,21831,6052,1862,795,2019-12-01,1,12,2019
7352,County,12/1/2019,NJ,Cape May County,24736,67541,306897,75265,57896,42770,...,43743,20609,5548,1868,788,361,2019-12-01,1,12,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8022,County,12/31/2019,NJ,Salem County,10857,51559,300738,66025,44250,41737,...,69313,17677,3549,1480,403,113,2019-12-31,31,12,2019
8023,County,12/31/2019,NJ,Somerset County,51048,279104,1467984,270505,378529,200923,...,247554,90829,15914,5026,1621,1489,2019-12-31,31,12,2019
8024,County,12/31/2019,NJ,Sussex County,21011,119358,655829,102820,134733,101090,...,129060,54896,8396,808,166,214,2019-12-31,31,12,2019
8025,County,12/31/2019,NJ,Union County,107688,448673,2715466,759257,786332,345503,...,350180,83834,19706,10329,2775,2418,2019-12-31,31,12,2019


In [24]:
county_trips_2019_dec_sum1 = pd.DataFrame(county_trips_2019_dec['Population Staying at Home']).sum
county_trips_2019_dec_sum1

<bound method DataFrame.sum of       Population Staying at Home
7348                       66055
7349                      221285
7350                      100818
7351                      128426
7352                       24736
...                          ...
8022                       10857
8023                       51048
8024                       21011
8025                      107688
8026                       16295

[651 rows x 1 columns]>

In [None]:
# Create a list for 2020
#county_trips_2020 = nonulls_trips_distance_df.loc[nonulls_trips_distance_df['year']==2020,:]

# Display the 2020 dataframe 
#county_trips_2020.head(2)

In [None]:
# Create a list for 2021
#ounty_trips_2021 = nonulls_trips_distance_df.loc[nonulls_trips_distance_df['year']==2021,:]

# Display the 2021 dataframe 
#ounty_trips_2021.head(2)

In [None]:
# Combining the three lists
#frames = [county_trips_2019, county_trips_2020, county_trips_2021]

#combined_years = pd.concat(frames)
#combined_years

## Charts and Graphs

In [None]:
#total_staying_home_2019 = (county_trips_2019['Population Staying at Home'].sum())
#total_staying_home_2019

#total_staying_home_2020 = (county_trips_2020['Population Staying at Home'].sum())
#total_staying_home_2020

#total_staying_home_2021 = (county_trips_2020['Population Staying at Home'].sum())
#total_staying_home_2021



In [None]:
# Filter the dataframe to only chart the population staying at home in each county
#summary_staying_home = pd.DataFrame({"2019": [total_staying_home_2019], "2020": [total_staying_home_2020],
                                   #"2021":[total_staying_home_2021]})

#summary_staying_home

In [None]:
#somerset_county = county_trips_2019.loc[county_trips_2019['County Name']=='Somerset County']
#somerset_county

In [None]:
#plt.scatter((somerset_county["datetime"]),(somerset_county["Population Staying at Home"]), facecolors="plum", edgecolors="black", alpha = 0.5)
#f=plt.figure()
#f.set_figwidth(5)
#f.set_figheight(2)
#plt.xlim('2019-01', '2020-01')
#plt.ylim(min(somerset_county["Population Staying at Home"]),max(somerset_county["Population Staying at Home"]))
#plt.xlabel("Date")
#plt.ylabel("Population Staying at Home")
#plt.title(f"NJ Population Staying at Home - 2019")
#plt.savefig("output/LatTmp.png")

#plt.show()