### Ways to merge DarkSky API data onto Capital Bikeshare 2017 data set.

#### Step 1: Get BikeShare data

In [1]:
import pandas as pd
import numpy as np
import os
import requests
%matplotlib inline

PATH = '2017-capitalbikeshare-tripdata'

data_2017=pd.DataFrame()
for csv in os.listdir(path=PATH):
    df=pd.read_csv(f'{PATH}/{csv}')
    data_2017 = pd.concat([data_2017, df], sort=True)

data_2017['Start date'] = pd.to_datetime(data_2017['Start date'])
data_2017['End date'] = pd.to_datetime(data_2017['End date'])
data_2017['Duration'] = data_2017['End date'] - data_2017['Start date']

data_2017['Month'] = data_2017['Start date'].dt.month
data_2017['Day'] = data_2017['Start date'].dt.day
data_2017['Hour'] = data_2017['Start date'].dt.hour
data_2017['Weekday'] = data_2017['Start date'].dt.weekday

response = requests.get('https://gbfs.capitalbikeshare.com/gbfs/en/station_information.json')
stations = pd.DataFrame(response.json()['data']['stations'])
stations_short = stations.copy()[['lat', 'lon', 'name', 'capacity', 'short_name', 'station_id']]

stations_short['short_name'] = stations_short['short_name'].astype(np.int64)
df_merged = pd.merge(data_2017, stations_short, how='inner', left_on='Start station number', right_on='short_name')

### Step 2: Get weather data from Carolina

In [4]:
weather = pd.read_csv('/Users/PaulWlodkowski/Downloads/allweather.csv', index_col=0)
weather['datetime'] = pd.to_datetime(weather['time'], unit='s')

### Step 3: Convert weather datetime column to DC time

In [28]:
weather['datetime DC'] = weather['datetime'].dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.tz_localize(None)

### Step 4: MERGE!

**Option 1**: ``pd.merge_asof()``

In [42]:
merge_v1 = pd.merge_asof(df_merged.sort_values(by='Start date'), weather, left_on='Start date', right_on='datetime DC')

---

**Option 2**: Round (or floor) datetimes in bikeshare data, then just use regular ``pd.merge``

In [55]:
df_merged['Start date rounded'] = df_merged['Start date'].dt.round('h')

df_merged['Start date floored'] = df_merged['Start date'].dt.floor('h')

merge_v2 = pd.merge(df_merged, weather, left_on='Start date rounded', right_on='datetime DC')

---

### Conclusion:
- **Both methods seem to work equally well :)**