In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
def prepare_dataframe(df):
    """updates dataframe for barchart"""
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    return df

In [3]:
df = pd.read_csv('berlin_bikedata_2017-2019.csv')

In [4]:
prepare_dataframe(df)

Unnamed: 0_level_0,station,total_bikes,hour,hour_str,weekday,day_name,month,month_name,year,description,lat,lon
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-01-01 00:00:00,02-MI-JAN-N,6,0,0 Uhr,6,Sunday,1,January,2017,Jannowitzbrücke Nord,52.513932,13.417835
2017-01-01 00:00:00,02-MI-JAN-S,5,0,0 Uhr,6,Sunday,1,January,2017,Jannowitzbrücke Süd,52.513943,13.417611
2017-01-01 00:00:00,03-MI-SAN-O,4,0,0 Uhr,6,Sunday,1,January,2017,Invalidenstraße Ost,52.527177,13.372016
2017-01-01 00:00:00,03-MI-SAN-W,5,0,0 Uhr,6,Sunday,1,January,2017,Invalidenstraße West,52.527686,13.373105
2017-01-01 00:00:00,05-FK-OBB-O,16,0,0 Uhr,6,Sunday,1,January,2017,Oberbaumbrücke Ost,52.501199,13.445060
...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31 23:00:00,21-NK-MAY,34,23,23 Uhr,1,Tuesday,12,December,2019,Maybachufer,52.493000,13.429000
2019-12-31 23:00:00,23-TK-KAI,6,23,23 Uhr,1,Tuesday,12,December,2019,Kaisersteg,52.457270,13.518700
2019-12-31 23:00:00,24-MH-ALB,0,23,23 Uhr,1,Tuesday,12,December,2019,Alberichstraße,52.492500,13.558490
2019-12-31 23:00:00,26-LI-PUP,13,23,23 Uhr,1,Tuesday,12,December,2019,Paul-und-Paula-Uferweg,52.500250,13.474380


### Barchart for all stations (to compare values)

In [5]:
class ComparisonBetweenStations:
    """Parameters for comparison between stations"""
    def __init__(self, years, aggregation):
        self.years = years
        if len(self.years) > 1: 
            self.years_string = '20' + '/'.join([str(year)[2:] for year in sorted(self.years)])
        else:
            self.years_string = ''.join(str(self.years[0]))
        self.aggregation = aggregation

In [6]:
comparison = ComparisonBetweenStations([2019], "mean")

In [7]:
def aggregate(df, comparison):
    """returns aggregated dataframe"""
    if comparison.aggregation == "sum":
        bikes_df = df[df.index.year.isin(comparison.years)].groupby('description')[['total_bikes']].sum().sort_values('total_bikes', ascending=True)
    elif comparison.aggregation == "mean":
        bikes_df = df[df.index.year.isin(comparison.years)].groupby('description')[['total_bikes']].resample('D').sum().reset_index().groupby('description')[['total_bikes']].mean().sort_values('total_bikes', ascending=True)
    return bikes_df

In [8]:
bikes_df = aggregate(df, comparison)

In [9]:
# Set general style for plotly graphs
px.defaults.template = "ggplot2"
px.defaults.color_continuous_scale = px.colors.sequential.Plasma_r

In [19]:
selected_station = "Schwedter Steg"

In [63]:
def get_key(my_dict, val):
    """function to return key for any value"""
    for key, value in my_dict.items():
         if val == value:
             return key
 
    return "key doesn't exist"


def map_colors(dataframe, station_name):
    """returns list of y values for horizontal bar and color map"""
    stations_dict = dataframe.reset_index()['description'].to_dict()
    colors = ['lightslategray',] * len(stations_dict)
    colors[get_key(stations_dict, station_name)] = 'crimson'
    stations_list = stations_dict.values()
    color_map = dict(zip(stations_list, colors))
    return stations_list, color_map

In [64]:
stations_list, color_map = map_colors(bikes_df, selected_station)

In [65]:
# Barchart with Total Bikes by year and bicycle counter
fig = px.bar(bikes_df.reset_index(), x="total_bikes", y="description", color=stations_list, color_discrete_map=color_map, orientation='h', labels={"total_bikes": "Total Bikes", "description": "Bicycle Counter"})
fig.add_annotation(text=f"{comparison.years_string}",
                  xref="paper", yref="paper",
                  x=1, y=-0.06, showarrow=False,
                  opacity=0.1,
                  font=dict(family='Arial', size=100, color="black"))
fig.show()

### Total Bikes across all stations as indicator

In [11]:
# Calculate total bikes for chosen timeframe
sum_total_bikes = int(df[df.index.year.isin(comparison.years)].groupby('description')[['total_bikes']].sum().groupby('description')[['total_bikes']].sum().sum())
sum_total_bikes

18024377

In [12]:
# Draw indicator
fig = go.Figure()
fig.add_trace(go.Indicator(
    mode = "number",
    value = int(sum_total_bikes),
    domain = {'row': 0, 'column': 1}))
fig.update_layout(
    grid = {'rows': 1, 'columns': 1, 'pattern': "independent"},
    template = {'data' : {'indicator': [{
        'title': {'text': f"Total Bikes ({comparison.years_string})"},
    }]}}
)