# Dublin Bikes -  Data Loading
This notebook downloads and consolidates a years worth of bike and dock availability in quarterly csv files. Data points are recorded every 10 minutes and the four files are consolidated into a single csv file. The datasets are freely provided by the [Smart Dublin](https://data.smartdublin.ie/dataset/dublinbikes-api)  and the [Met Éireann](https://www.met.ie/climate/available-data/historical-data) websites.

In [None]:
#Setup and common libraries

import pandas as pd
import numpy as np
import warnings
import time
import datetime as dt
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import urllib
import folium
#import sklearn
import seaborn as sns
import os
#from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
#from sklearn.cluster import KMeans
# from matplotlib import inline
from ipywidgets import interact, interact_manual

## Defines a list of tuples with ordered pairs of filenames and retrieval urls. Dublin Bikes provide their data on a quarterly basis

In [None]:

if not os.path.exists("data"):
    os.makedirs("data")
    
    
dataframes = [
    (
        "data/dublinbikes_20190101_20190401.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/538165d7-535e-4e1d-909a-1c1bfae901c5/download/dublinbikes_20190101_20190401.csv",
    ),
    (
        "data/dublinbikes_20190401_20190701.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/76fdda3d-d8be-441b-92dd-0ee36d9c5316/download/dublinbikes_20190401_20190701.csv",
    ),
    (
        "data/dublinbikes_20190701_20191001.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/305d39ac-b6a0-4216-a535-0ae2ddf59819/download/dublinbikes_20190701_20191001.csv",
    ),
    (
        "data/dublinbikes_20191001_20200101.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/5d23332e-4f49-4c41-b6a0-bffb77b33d64/download/dublinbikes_20191001_20200101.csv",
    ),
]

## Define the names and locations of the consolidated Bike and Dock availability csv and the weather csv.

In [None]:
bikes_df = "data/bikes.csv"
weather_df = "data/weather.csv"

In [None]:
# Retrieve data functions for historical weather data from the Irish Meteorological Society websites 
for item in dataframes:
    if os.path.exists(item[0]):
        continue
    print(f"Downloading {item[0]} from {item[1]}")
    urllib.request.urlretrieve(item[1], item[0])
    
def retrieve_weather():
    weather_url = "http://cli.fusio.net/cli/climate_data/webdata/hly175.csv"
    urllib.request.urlretrieve(weather_url, weather_df)
    #Conditionally calls  retrieval functions if CSVs are not in place in the"data" directory
    
if not os.path.exists(weather_df):
    retrieve_weather()

In [None]:
#Conditionally calls  weather retrieval function
if not os.path.exists(weather_df):
    retrieve_weather()

In [14]:
# Read and concatenate quarterly bike data  
# Read  Phoinex Park (station 175) weather data from the Irish Meteorological Society website on an hourly basis. Original file is hly175.csv
bikes_df = "data/bikes.csv"
weather_df = pd.read_csv("data/weather.csv", skiprows=15)
bikes_df = pd.concat([pd.read_csv(item[0]) for item in dataframes])

if not os.path.exists("data/bikes.csv"):
    bikes_df.to_csv("data/bikes.csv", index=False)

In [26]:
bikes_df.sample(8)    

Unnamed: 0,STATION ID,LAST UPDATED,NAME,BIKE STANDS,AVAILABLE BIKE STANDS,AVAILABLE BIKES,STATUS,ADDRESS,LATITUDE,LONGITUDE
1510468,27,2019-02-16 00:52:48,MOLESWORTH STREET,20,20,0,Open,Molesworth Street,53.34129,-6.258117
2719839,6,2019-09-24 09:38:39,CHRISTCHURCH PLACE,20,18,2,Open,Christchurch Place,53.343369,-6.27012
2531101,32,2019-03-20 04:38:48,PEARSE STREET,30,30,0,Open,Pearse Street,53.344303,-6.250427
360567,23,2019-07-12 08:29:02,CUSTOM HOUSE,30,12,17,Open,Custom House,53.348278,-6.254662
1686586,74,2019-02-21 08:08:09,OLIVER BOND STREET,30,29,1,Open,Oliver Bond Street,53.343891,-6.280531
2305013,109,2019-09-10 13:09:44,BUCKINGHAM STREET LOWER,29,27,2,Open,Buckingham Street Lower,53.353333,-6.249319
58735,95,2019-07-02 22:28:48,ROYAL HOSPITAL,40,0,40,Open,Royal Hospital,53.343899,-6.29706
1806223,9,2019-11-27 21:59:20,EXCHEQUER STREET,24,22,1,Open,Exchequer Street,53.343033,-6.263578


In [20]:
#Time can be dropped
bikes_df.dtypes

STATION ID                 int64
TIME                      object
LAST UPDATED              object
NAME                      object
BIKE STANDS                int64
AVAILABLE BIKE STANDS      int64
AVAILABLE BIKES            int64
STATUS                    object
ADDRESS                   object
LATITUDE                 float64
LONGITUDE                float64
dtype: object

In [21]:
#remove rows where no update actually occurs
bikes_df= bikes_df.drop(['TIME'], axis = 1)
# bikes_df.drop_duplicates(keep= 'first',inplace=True)

In [22]:
bikes_df.head(11)  

Unnamed: 0,STATION ID,LAST UPDATED,NAME,BIKE STANDS,AVAILABLE BIKE STANDS,AVAILABLE BIKES,STATUS,ADDRESS,LATITUDE,LONGITUDE
0,1,2018-12-31 23:50:05,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
1,1,2019-01-01 00:00:14,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
2,1,2019-01-01 00:00:14,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
3,1,2019-01-01 00:10:24,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
4,1,2019-01-01 00:10:24,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
5,1,2019-01-01 00:20:32,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
6,1,2019-01-01 00:20:32,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
7,1,2019-01-01 00:32:52,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
8,1,2019-01-01 00:32:52,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
9,1,2019-01-01 00:43:01,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
