# Dublin Bikes -  Data Loading
This notebook downloads and consolidates a years worth of bike and dock availability in quarterly csv files. Data points are recorded every 10 minutes and the four files are consolidated into a single csv file. The datasets are freely provided by the [Smart Dublin](https://data.smartdublin.ie/dataset/dublinbikes-api)  and the [Met Éireann](https://www.met.ie/climate/available-data/historical-data) websites.

In [None]:
#Setup and common libraries

import pandas as pd
import numpy as np
import warnings
import time
import datetime as dt
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import urllib
import folium
#import sklearn
import seaborn as sns
import os
#from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
#from sklearn.cluster import KMeans
# from matplotlib import inline
from ipywidgets import interact, interact_manual

## Defines a list of tuples with ordered pairs of filenames and retrieval urls. Dublin Bikes provide their data on a quarterly basis

In [None]:

if not os.path.exists("data"):
    os.makedirs("data")
    
    
dataframes = [
    (
        "data/dublinbikes_20190101_20190401.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/538165d7-535e-4e1d-909a-1c1bfae901c5/download/dublinbikes_20190101_20190401.csv",
    ),
    (
        "data/dublinbikes_20190401_20190701.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/76fdda3d-d8be-441b-92dd-0ee36d9c5316/download/dublinbikes_20190401_20190701.csv",
    ),
    (
        "data/dublinbikes_20190701_20191001.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/305d39ac-b6a0-4216-a535-0ae2ddf59819/download/dublinbikes_20190701_20191001.csv",
    ),
    (
        "data/dublinbikes_20191001_20200101.csv",
        "https://data.smartdublin.ie/dataset/33ec9fe2-4957-4e9a-ab55-c5e917c7a9ab/resource/5d23332e-4f49-4c41-b6a0-bffb77b33d64/download/dublinbikes_20191001_20200101.csv",
    ),
]

## Define the names and locations of the consolidated Bike and Dock availability csv and the weather csv.

In [None]:
bikes_df = "data/bikes.csv"
weather_df = "data/weather.csv"

In [None]:
# Retrieve data functions for historical weather data from the Irish Meteorological Society websites 
for item in dataframes:
    if os.path.exists(item[0]):
        continue
    print(f"Downloading {item[0]} from {item[1]}")
    urllib.request.urlretrieve(item[1], item[0])
    
def retrieve_weather():
    weather_url = "http://cli.fusio.net/cli/climate_data/webdata/hly175.csv"
    urllib.request.urlretrieve(weather_url, weather_df)
    #Conditionally calls  retrieval functions if CSVs are not in place in the"data" directory
    
if not os.path.exists(weather_df):
    retrieve_weather()

In [None]:
#Conditionally calls  weather retrieval function
if not os.path.exists(weather_df):
    retrieve_weather()

In [None]:
# Read and concatenate quarterly bike data  
# Read  Phoinex Park (station 175) weather data from the Irish Meteorological Society website on an hourly basis. Original file is hly175.csv
bikes_df = "data/bikes.csv"
weather_df = pd.read_csv("data/weather.csv", skiprows=15)
bikes_df = pd.concat([pd.read_csv(item[0]) for item in dataframes])

if not os.path.exists("data/bikes.csv"):
    bikes_df.to_csv("data/bikes.csv", index=False)

In [35]:
bikes_df.sample(8)    

Unnamed: 0,STATION ID,TIME,LAST UPDATED,NAME,BIKE STANDS,AVAILABLE BIKE STANDS,AVAILABLE BIKES,STATUS,ADDRESS,LATITUDE,LONGITUDE
25918,93,2019-04-01 07:20:02,2019-04-01 07:16:55,HEUSTON STATION (CENTRAL),40,40,0,Open,Heuston Station (Central),53.346603,-6.296924
2286969,36,2019-06-10 05:15:02,2019-06-10 05:09:24,ST. STEPHEN'S GREEN EAST,40,7,33,Open,St. Stephen's Green East,53.337826,-6.256035
511649,100,2019-07-16 22:45:02,2019-07-16 22:43:03,HEUSTON BRIDGE (SOUTH),25,9,16,Open,Heuston Bridge (South),53.347107,-6.292041
911412,61,2019-10-29 18:45:02,2019-10-29 18:39:49,HARDWICKE PLACE,25,23,2,Open,Hardwicke Place,53.357044,-6.263232
2187930,82,2019-03-09 20:20:03,2019-03-09 20:15:45,MOUNT BROWN,22,20,2,Open,Mount Brown,53.341644,-6.29719
793997,24,2019-01-25 21:35:03,2019-01-25 21:34:19,CATHAL BRUGHA STREET,20,3,17,Open,Cathal Brugha Street,53.35215,-6.260533
387706,16,2019-10-13 04:50:02,2019-10-13 04:40:59,GEORGES QUAY,20,0,20,Open,Georges Quay,53.347507,-6.252192
43807,40,2019-01-02 05:45:02,2019-01-02 05:41:37,JERVIS STREET,21,3,18,Open,Jervis Street,53.348301,-6.266651


In [36]:
#Time can be dropped
bikes_df.dtypes

STATION ID                 int64
TIME                      object
LAST UPDATED              object
NAME                      object
BIKE STANDS                int64
AVAILABLE BIKE STANDS      int64
AVAILABLE BIKES            int64
STATUS                    object
ADDRESS                   object
LATITUDE                 float64
LONGITUDE                float64
dtype: object

In [37]:
###remove rows where no update actually occurs
# bikes_df= bikes_df.drop(['TIME'], axis = 1)
# bikes_df.drop_duplicates(keep= 'first',inplace=True)

In [38]:
bikes_df.head(11)  

Unnamed: 0,STATION ID,TIME,LAST UPDATED,NAME,BIKE STANDS,AVAILABLE BIKE STANDS,AVAILABLE BIKES,STATUS,ADDRESS,LATITUDE,LONGITUDE
0,1,2019-01-01 00:00:03,2018-12-31 23:50:05,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
1,1,2019-01-01 00:05:03,2019-01-01 00:00:14,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
2,1,2019-01-01 00:10:02,2019-01-01 00:00:14,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
3,1,2019-01-01 00:15:02,2019-01-01 00:10:24,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
4,1,2019-01-01 00:20:01,2019-01-01 00:10:24,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
5,1,2019-01-01 00:25:02,2019-01-01 00:20:32,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
6,1,2019-01-01 00:30:02,2019-01-01 00:20:32,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
7,1,2019-01-01 00:35:01,2019-01-01 00:32:52,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
8,1,2019-01-01 00:40:02,2019-01-01 00:32:52,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501
9,1,2019-01-01 00:45:02,2019-01-01 00:43:01,CLARENDON ROW,31,30,1,Open,Clarendon Row,53.340927,-6.262501


In [39]:
weather_df.head()

Unnamed: 0,date,ind,rain,ind.1,temp,ind.2,wetb,dewpt,vappr,rhum,msl
0,16-aug-2003 01:00,0,0.0,0,9.2,0,8.9,8.5,11.1,95,1021.9
1,16-aug-2003 02:00,0,0.0,0,9.0,0,8.7,8.5,11.1,96,1021.7
2,16-aug-2003 03:00,0,0.0,0,8.2,0,8.0,7.7,10.5,96,1021.2
3,16-aug-2003 04:00,0,0.0,0,8.4,0,8.1,7.9,10.7,97,1021.2
4,16-aug-2003 05:00,0,0.0,0,7.7,0,7.5,7.3,10.2,97,1021.1


In [43]:
bikes_df.to_csv("data/bikes1.csv", index=False)
bikes_df = None

weather_df.to_csv("data/weather1.csv", index=False)
weather_df = None