# Table of Contents
* [Import and Data Preparation](#1)
* [Development by Station](#2)
* [Averages and Visualization](#3)
* [Stats by Station and Correlations](#4)

In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# Maps
import folium

In [None]:
# files
!ls -l ../input/bike-traffic-in-munich/

<a id='1'></a>
# Import and Data Preparation

In [None]:
# load location data
df_locations = pd.read_csv('../input/bike-traffic-in-munich/radzaehlstellen.csv')
df_locations

In [None]:
# translate columns to English
df_locations = df_locations.rename(columns = 
                                   {'zaehlstelle' : 'measuring_station',
                                    'zaehlstelle_lang': 'measuring_station_long',
                                    'richtung_1' : 'direction_1',
                                    'richtung_2' : 'direction_2',
                                    'besonderheiten' : 'notes'},
                                    inplace = False)

df_locations

In [None]:
# load daily stats
df_days = pd.read_csv('../input/bike-traffic-in-munich/rad_tage.csv')
df_days.head()

In [None]:
# translate columns to English
df_days = df_days.rename(columns = 
                         {'datum' : 'date',
                          'uhrzeit_start': 'time_start',
                          'uhrzeit_ende': 'time_end',
                          'zaehlstelle' : 'measuring_station',
                          'richtung_1' : 'direct_1',
                          'richtung_2' : 'direct_2',
                          'gesamt' : 'total',
                          'niederschlag' : 'precipitation',
                          'bewoelkung' : 'cloud_cover_perc',
                          'sonnenstunden' : 'sun_hours'},
                          inplace = False)

df_days.head()

In [None]:
# structure of data frame
df_days.info()

<a id='2'></a>
# Development by Station

In [None]:
# plot development of total by measuring station
fig, ax = plt.subplots(figsize=(16,6))
ax = sns.lineplot(data=df_days, x='date', y='total',
                  hue = df_days.measuring_station,
                  alpha = 0.5)
ax.xaxis.set_major_locator(plt.MaxNLocator(20)) # reduce number of x-labels
plt.title('Total by Measuring Station')
plt.xticks(rotation=90)
plt.grid()
plt.show()

#### Using logarithmic y-axis:

In [None]:
# same plot but y-axis in log scale
fig, ax = plt.subplots(figsize=(16,6))
ax = sns.lineplot(data=df_days, x='date', y='total',
                  hue = df_days.measuring_station,
                  alpha = 0.5)
ax.xaxis.set_major_locator(plt.MaxNLocator(20)) # reduce number of x-labels
plt.xticks(rotation=90)
plt.yscale('log')
plt.legend(loc='lower left')
plt.title('Total by Measuring Station')
plt.grid()
plt.show()

In [None]:
# scatter plots by station
sns.pairplot(df_days[['sun_hours','precipitation','cloud_cover_perc',
                      'total','measuring_station']],
             hue='measuring_station',
             plot_kws = dict(alpha=0.25, s=5),
             diag_kws = dict(alpha=0.25))
plt.show()

### Pick only one station:

In [None]:
# pick only one location
df_days_sel = df_days[df_days.measuring_station=='Arnulf']
sns.pairplot(df_days_sel[['sun_hours','precipitation','cloud_cover_perc',
                          'total','measuring_station']],
             hue='measuring_station', 
             plot_kws = dict(alpha=0.25, s=10),
             diag_kws = dict(alpha=0.25))
plt.show()

In [None]:
# plot development
fig, ax = plt.subplots(figsize=(16,6))
ax = sns.lineplot(data=df_days_sel, x='date', y='total',
                  alpha=0.5)
ax.xaxis.set_major_locator(plt.MaxNLocator(20)) # reduce number of x-labels
plt.title('Total for Measuring Station "Arnulf"')
plt.xticks(rotation=90)
plt.grid()
plt.show()

<a id='3'></a>
# Averages and Visualization

### Calc averages by measuring station:

In [None]:
df_days_means = df_days.groupby(['measuring_station'], as_index=False).mean()
df_days_means

In [None]:
# show total by station, split in directions
fig, ax = plt.subplots(figsize=(11,5))
ax.bar(df_days_means.measuring_station, df_days_means.direct_1, 
       label='Direction 1')
ax.bar(df_days_means.measuring_station, df_days_means.direct_2,
       bottom= df_days_means.direct_1,
       label='Direction 2')
ax.set_ylabel('Average by Station and Direction')
ax.set_title('')
ax.legend()
plt.grid()
plt.show()

In [None]:
# add calculated stats to locations table
df_locations_x = df_locations.merge(df_days_means, on='measuring_station')
# show merged table
df_locations_x

### Visualize Measuring Stations:

In [None]:
# interactive map
zoom_factor = 11.5 # inital map size

my_map_1 = folium.Map(location=[48.14,11.55], zoom_start=zoom_factor)

for i in range(0,df_locations_x.shape[0]):
   folium.Circle(
      location=[df_locations_x.iloc[i]['latitude'], df_locations_x.iloc[i]['longitude']],
      radius=10*np.sqrt(df_locations_x.iloc[i]['total']),      
      popup='Location: ' + df_locations_x.iloc[i]['measuring_station'] + 
           ' - Total: ' + str(np.round(df_locations_x.iloc[i]['total'],2)),
      color='blue',
      fill=True,
      fill_color='blue'
   ).add_to(my_map_1)

my_map_1 # display

<a id='4'></a>
# Stats by Station and Correlations

In [None]:
# transpose values per station into new columns
df_days_pivot = pd.pivot_table(data=df_days[['date','measuring_station','total']],
                                # class_id is used as dummy column for counting only
                                index='date', # arrange by date
                                columns=['measuring_station'], # new column for each station
                                fill_value=0,
                                aggfunc='sum')
# simpliy column name hierarchy
df_days_pivot.columns = df_days_pivot.columns.droplevel(0).tolist()
df_days_pivot.head()

In [None]:
# summary stats by station
df_days_pivot.describe()

In [None]:
# correlations
corr_pearson = df_days_pivot.corr(method='pearson')
corr_spearman = df_days_pivot.corr(method='spearman')

plt.figure(figsize=(15,5))
ax1 = plt.subplot(1,2,1)
sns.heatmap(corr_pearson, annot=True, cmap='RdYlGn', vmin=0, vmax=+1)
plt.title('Pearson Correlation')

ax2 = plt.subplot(1,2,2, sharex=ax1)
sns.heatmap(corr_spearman, annot=True, cmap='RdYlGn', vmin=0, vmax=+1)
plt.title('Spearman Correlation')
plt.show()

In [None]:
# pairwise scatter plot
sns.pairplot(df_days_pivot, 
             kind='reg',
             plot_kws={'line_kws':{'color':'magenta'}, 
                       'scatter_kws': {'alpha': 0.2}})
plt.show()
