# This Notebook is a Playground to Practice on August 2020 data
### The important thing here is we calculated the avg daily ridership (~ 4.6 million)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# You can configure the format of the images: ‘png’, ‘retina’, ‘jpeg’, ‘svg’, ‘pdf’.
%config InlineBackend.figure_format = 'svg'
# this statement allows the visuals to render within your Jupyter Notebook
%matplotlib inline 
import functions_python as funcs
%load_ext autoreload
%autoreload 2

pd.set_option('display.max_rows', 1000)

In [None]:
locations_df = pd.read_csv('http://web.mta.info/developers/data/nyct/turnstile/turnstile_200919.txt')
highlighted_stations = pd.read_csv('important_stations.csv')

In [None]:
# read in August 2019 mta data
august_2020_df_list = ['200829', '200822', '200815', '200808', '200801']

In [None]:
aug_2020_df = funcs.combine_dfs_add_time(august_2020_df_list)
aug_2020_df.head()

In [None]:
aug_2020_df = funcs.add_entry_and_exit_differences(aug_2020_df)
aug_2020_df.head()

In [None]:
aug_2020_df = funcs.clean_entry_exit_values(aug_2020_df, 3000)
aug_2020_df.head()

# There was an average ridership of about 1.1 million per day in august 2020¶
# This is about 25% of 2019

In [None]:
aug_2020_df.groupby(['DATE'])['ENTRIES_DIFF'].sum().mean()

In [None]:
aug_2020_totals_per_station = funcs.totals_combined_per_station(aug_2020_df)
aug_2020_totals_per_station.sort_values("COMBINED", ascending=False)

In [None]:
aug_2020_avg_per_station = funcs.avg_combined_per_station(aug_2020_df)
aug_2020_avg_per_station.sort_values("COMBINED", ascending=False)

In [None]:
aug_2020_avg_traffic_per_day_per_station = funcs.avg_per_day_of_week(aug_2020_df)
aug_2020_avg_traffic_per_day_per_station.head()

In [None]:
aug_2020_avg_traffic_per_day_per_time_per_station = funcs.avg_per_day_of_week_and_time(aug_2020_df)
aug_2020_avg_traffic_per_day_per_time_per_station.head()

In [None]:
highlighted_stations.head()

In [None]:
highlighted_stations.drop(['Unnamed: 0'], axis=1, inplace=True, errors='ignore')
highlighted_stations.rename(columns={'stations': 'STATION'}, inplace=True)
highlighted_stations.head()

In [None]:
station_score_df = highlighted_stations[["STATION", "total score"]]
station_score_df.head()

In [None]:
aug_totals_with_score = pd.merge(aug_2020_totals_per_station, highlighted_stations, on='STATION')
aug_totals_with_score.head()

In [None]:
color_mapper = {
    "7":'g',
    "6":'b',
    "5":'y',
    "4":'y',
    "3":'orange',
    "2":'r',
    "1":'r',
    "0":'r',
}

In [None]:
aug_totals_with_score["color"] = aug_totals_with_score["total score"].astype(str).map(color_mapper)
aug_totals_with_score.head(50)

In [None]:
aug_totals_with_score.head(50).plot.bar(x='STATION', y='COMBINED', rot=0, color=aug_totals_with_score["color"], figsize=(10,5))