# Weekday vs Weekend Analysis 

In [3]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from glob import glob
import json
import requests

import matplotlib.pyplot as plt
%matplotlib inline

import plotly.plotly as py
import plotly.graph_objs as go
import plotly

In [4]:
full_df = pd.read_hdf("bicing_full.h5", key="table")
full_df["bikes"] = full_df["bikes"].astype(int)
full_df["slots"] = full_df["slots"].astype(int)

Quick preview of the data

In [5]:
full_df.head(5)

Unnamed: 0,id,status,bikes,slots,zip,timestamp
0,1,'OPN',5,19,8013,2015-11-22 23:40:00
1,2,'OPN',8,19,8010,2015-11-22 23:40:00
2,3,'OPN',11,16,8013,2015-11-22 23:40:00
3,4,'OPN',5,16,8013,2015-11-22 23:40:00
4,5,'OPN',21,18,8003,2015-11-22 23:40:00


## 1. Bikes by districts (on weekday)

I am going to assign districts to each station id and compute average number of bikes in each district at certain hours on weekdays.

I will use https://www.bicing.cat/availability_map/getJsonObject to assign districts for ids.

In [6]:
url = "https://www.bicing.cat/availability_map/getJsonObject"
r = requests.get(url).text
file = json.loads(r)

id_district_table = {}
for station in file:
    station_id = int(station["id"])
    district = station["district"]
    id_district_table[station_id] = district

#assign district to each id
full_df["district"] = full_df["id"].map(id_district_table)

#convert district codes into district names
district_names = {"1":"Ciutat Vella",
                "2":"Eixample",
                "3":"Sants-Montjuïc",
                "4":"Les Corts",
                "5":"Sarrià - Sant Gervasi",
                "6":"Gràcia",
                "7":"Horta-Guinardó",
                "8":"Nou Barris",
                "9":"Sant Andreu",
                "10":"Sant Martí"}

full_df["district"] = full_df["district"].map(district_names)

#subset the database to get weekdays only (excluding Saturdays and Sundays)
mask_weekdays = (full_df.timestamp.dt.dayofweek != 5) & (full_df.timestamp.dt.dayofweek != 6)
full_df_weekdays = full_df[mask_weekdays]
full_df_weekdays = full_df_weekdays.set_index("timestamp")

In [32]:
data_weekday = []

for key, value in district_names.items():
    district_df = full_df_weekdays[full_df_weekdays["district"] == value]
    district_means = district_df.groupby(district_df.index.hour).bikes.mean()
    data_district = go.Scatter(x=district_means.index, y=district_means.values, name=value)
    data_weekday.append(data_district)

layout = go.Layout()
py.iplot(data_weekday, layout=layout)

## Summary:
* It's easy to see that Bicing service opens at 5 AM
* Sant Marti, Gracia, Eixample, Les Corts, Sant Andreu, Sants-Montjuic and Sarrià - Sant Gervasi are the districts where people take bikes in the morning and move to other "barrios". Sarrià - Sant Gervasi is an extreme example where the average number of bikes descreases from 16 at 5AM to less than 4 at 10 AM.
* Ciutat Vella, Nou Barris and Horta-Guinardó are the districts where most people park their bikes in teh morning.
* The number of bikes in Gracia and Horta-Guinardó decrease during the day. My guess would be that people use Bicing to go downhill in the morning but not necesarily to go back. 

## 2. Bikes by district on weekends

In [29]:
mask_weekends = (full_df.timestamp.dt.dayofweek == 5) | (full_df.timestamp.dt.dayofweek == 6)
full_df_weekends = full_df[mask_weekends]

full_df_weekends["bikes"] = full_df_weekends["bikes"].astype(int)
full_df_weekends = full_df_weekends.set_index("timestamp")


data_weekend = []

for key, value in district_names.items():
    district_df = full_df_weekends[full_df_weekends["district"] == value]
    district_means = district_df.groupby(district_df.index.hour).bikes.mean()
    data_district = go.Scatter(x=district_means.index, y=district_means.values, name=value)
    data_weekend.append(data_district)

layout = go.Layout(title="Average number of bikes per hour on weekends")
py.iplot(data_weekend, layout=layout)

## Summary:
* Users from Sarrià - Sant Gervasi, Sants-Montjuïc, Les Corts, Eixample and Sant Marti don't like spending weekends in their districts :)
* People usually travel to Ciutat Vella on weekends

## 3. House - work on weekday

I have noticed that there are very few bikes after 9 AM at the station next to my house. At the same time the stations in Poblenou, where I work, are almost full. I should probably wake up earlier...

In [25]:
#create plot for my house
my_district = full_df_weekdays[full_df_weekdays["id"] == 241]

bikes = my_district.groupby(my_district.index.hour).bikes.mean()
slots = my_district.groupby(my_district.index.hour).slots.mean()

trace1 = go.Bar(x=bikes.index, y=bikes.values, name="bikes")
trace2 = go.Bar(x=slots.index, y=slots.values, name="slots")

data = [trace1, trace2]
layout = go.Layout(barmode='stack',title="Bikes at station 241 (Pl. Maragall, 22)", 
        xaxis=dict(
        title='Hour',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Bikes availability',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ) )

                    

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')



In [24]:
#create plot for station next my work
my_work = full_df_weekdays[full_df_weekdays["id"] == 150]

bikes = my_work.groupby(my_work.index.hour).bikes.mean()
slots = my_work.groupby(my_work.index.hour).slots.mean()

trace1 = go.Bar(x=bikes.index, y=bikes.values, name="bikes", marker=dict(color='rgb(26, 118, 255)'))
trace2 = go.Bar(x=slots.index, y=slots.values, name="slots", marker=dict(color='rgb(55, 83, 109)'))

data = [trace1, trace2]
layout = go.Layout(barmode='stack', title="Bikes at station 150 (Espronceda, 124)",
        xaxis=dict(
        title='Hour',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Bikes availability',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ) )

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')