### This script contains the below:
1. Import libraries
2. Importing bike trips data to merge it
3. Get weather data using NOAA's API
4. Merge bike trip and weather data

# 01 Importing libraries

In [6]:
import pandas as pd
import numpy as np
import os
import requests
import json
from datetime import datetime

# 02 Importing data

In [9]:
# Create a list with all files in the folder using a list comprehension

folderpath = r"02 Data/Original Data" 
filepath = [os.path.join(folderpath, name) for name in os.listdir(folderpath)]

In [12]:
filepath

['02 Data/Original Data/Divvy_Trips_2018_Q1.csv',
 '02 Data/Original Data/Divvy_Trips_2018_Q2.csv',
 '02 Data/Original Data/Divvy_Trips_2018_Q3.csv',
 '02 Data/Original Data/Divvy_Trips_2018_Q4.csv']

In [19]:
# merging the data
df = pd.concat((pd.read_csv(f) for f in filepath), ignore_index = True)

In [15]:
df.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear
0,17536702,2018-01-01 00:12:00,2018-01-01 00:17:23,3304,323.0,69,Damen Ave & Pierce Ave,159,Claremont Ave & Hirsch St,Subscriber,Male,1988.0
1,17536703,2018-01-01 00:41:35,2018-01-01 00:47:52,5367,377.0,253,Winthrop Ave & Lawrence Ave,325,Clark St & Winnemac Ave (Temp),Subscriber,Male,1984.0
2,17536704,2018-01-01 00:44:46,2018-01-01 01:33:10,4599,2904.0,98,LaSalle St & Washington St,509,Troy St & North Ave,Subscriber,Male,1989.0
3,17536705,2018-01-01 00:53:10,2018-01-01 01:05:37,2302,747.0,125,Rush St & Hubbard St,364,Larrabee St & Oak St,Subscriber,Male,1983.0
4,17536706,2018-01-01 00:53:37,2018-01-01 00:56:40,3696,183.0,129,Blue Island Ave & 18th St,205,Paulina St & 18th St,Subscriber,Male,1989.0


In [16]:
df.shape

(3603082, 12)

# 03 Gathering Weather Data

In [20]:
# Define your NOAA token

Token = 'HLPKfjkSRPFNvFLMQBdRxcFpoojZoEwc'

In [23]:
# URL for O’Hare
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=TAVG&limit=1000&stationid=GHCND:USW00094846&startdate=2018-01-01&enddate=2018-12-31"

r = requests.get(url, headers={'token': Token})

In [24]:
# Store the Data in JSON Format
d = json.loads(r.text)  

In [55]:
# Secure all items in the response that correspond to TAVG

avg_temps = [item for item in d['results'] if item['datatype']=='TAVG']
# Get only the date field from all average temperature readings

dates_temp = [item['date'] for item in avg_temps]
# Get the temperature from all average temperature readings

temps = [item['value'] for item in avg_temps]

In [56]:
df_temp = pd.DataFrame()

In [57]:
# Get only date and cast it to date time; convert temperature from tenths of Celsius to normal Celsius

df_temp['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_temp]
df_temp['avgTemp'] = [float(v)/10.0 for v in temps]

In [58]:
df_temp.head()

Unnamed: 0,date,avgTemp
0,2018-01-01,-18.9
1,2018-01-02,-19.1
2,2018-01-03,-11.4
3,2018-01-04,-13.9
4,2018-01-05,-15.2


# 04 Merge bike trip and weather data

In [59]:
df.dtypes

trip_id                       int64
start_time           datetime64[ns]
end_time                     object
bikeid                        int64
tripduration                 object
from_station_id               int64
from_station_name            object
to_station_id                 int64
to_station_name              object
usertype                     object
gender                       object
birthyear                   float64
date                         object
dtype: object

In [61]:
df_temp.dtypes

date       datetime64[ns]
avgTemp           float64
dtype: object

In [62]:
# Convert 'date' from string to datetime

df['date'] = pd.to_datetime(df['date'])

In [63]:
df.dtypes

trip_id                       int64
start_time           datetime64[ns]
end_time                     object
bikeid                        int64
tripduration                 object
from_station_id               int64
from_station_name            object
to_station_id                 int64
to_station_name              object
usertype                     object
gender                       object
birthyear                   float64
date                 datetime64[ns]
dtype: object

In [64]:
# Merge dataframes

df_merged = df.merge(df_temp, how = 'left', on = 'date', indicator = True)

In [65]:
df_merged['_merge'].value_counts(dropna = False)

both          3603082
left_only           0
right_only          0
Name: _merge, dtype: int64

In [66]:
df_merged.head()

Unnamed: 0,trip_id,start_time,end_time,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear,date,avgTemp,_merge
0,17536702,2018-01-01 00:12:00,2018-01-01 00:17:23,3304,323.0,69,Damen Ave & Pierce Ave,159,Claremont Ave & Hirsch St,Subscriber,Male,1988.0,2018-01-01,-18.9,both
1,17536703,2018-01-01 00:41:35,2018-01-01 00:47:52,5367,377.0,253,Winthrop Ave & Lawrence Ave,325,Clark St & Winnemac Ave (Temp),Subscriber,Male,1984.0,2018-01-01,-18.9,both
2,17536704,2018-01-01 00:44:46,2018-01-01 01:33:10,4599,2904.0,98,LaSalle St & Washington St,509,Troy St & North Ave,Subscriber,Male,1989.0,2018-01-01,-18.9,both
3,17536705,2018-01-01 00:53:10,2018-01-01 01:05:37,2302,747.0,125,Rush St & Hubbard St,364,Larrabee St & Oak St,Subscriber,Male,1983.0,2018-01-01,-18.9,both
4,17536706,2018-01-01 00:53:37,2018-01-01 00:56:40,3696,183.0,129,Blue Island Ave & 18th St,205,Paulina St & 18th St,Subscriber,Male,1989.0,2018-01-01,-18.9,both
