# Task 2.2 - New York's CitiBike 

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import os
import requests
import json
from datetime import datetime

In [None]:
# Create a list with all files in the folder using a list comprehension

folderpath = r"Data" 
filepaths  = [os.path.join(folderpath, name) for name in os.listdir(folderpath)]

In [None]:
filepaths

#### By putting all of our data in the Data folder, we're easily able to call all the files we need in 1 simple code instead of a more complicated for-loop code, which is not needed here. By using comprehensions, we're able to write less code (making this faster), helps to optimized Python so it runs faster, and we reduce function call overhead.

In [None]:
# Read and join/concatenate all files simultaneously

df = pd.concat((pd.read_csv(f) for f in filepaths), ignore_index = True)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

## Get weather data from NOAA's API

In [None]:
# Define your NOAA token

Token = 'WxSJZBiUNEJFprPrelTdssDhQrsDHDKQ'

In [None]:
# Get the API 

r = requests.get('https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=TAVG&limit=1000&stationid=GHCND:USW00014732&startdate=2022-01-01&enddate=2022-12-31', headers={'token':Token})

In [None]:
# Load the api response as a json

d = json.loads(r.text)

In [None]:
d

In [None]:
# Secure all items in the response that correspond to TAVG

avg_temps = [item for item in d['results'] if item['datatype']=='TAVG']

In [None]:
# Get only the date field from all average temperature readings

dates_temp = [item['date'] for item in avg_temps]

In [None]:
# Get the temperature from all average temperature readings

temps = [item['value'] for item in avg_temps]

In [None]:
temps

In [None]:
# Put the results in a dataframe

df_temp = pd.DataFrame()

In [None]:
# Get only date and cast it to date time; convert temperature from tenths of Celsius to normal Celsius

df_temp['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_temp]
df_temp['avgTemp'] = [float(v)/10.0 for v in temps]

In [None]:
df_temp.tail()

In [None]:
df_temp.head()

In [None]:
df.dtypes

In [None]:
df['started_at'] = pd.to_datetime(df['started_at'])

In [None]:
df['date'] = pd.to_datetime(df['started_at'], format='%Y-%m-%d').dt.date

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
df_temp.head()

In [None]:
%%time
df_merged = df.merge(df_temp, how = 'left', on = 'date', indicator = True)

In [None]:
df_merged.head()

In [None]:
df_merged['_merge'].value_counts(dropna = False)

In [None]:
df_merged.to_csv('newyork_data.csv')

In [None]:
df.shape