# Recreating @eleonoran's implementation which can be found [here](https://ibb.co/h1F0Lh1).

In [1]:
import math
import numpy as np 
import pandas as pd
from PIL import Image

import matplotlib
from matplotlib.backends.backend_pgf import FigureCanvasPgf
matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)

from matplotlib import rc
rc('text',usetex=True)
rc('text.latex', preamble=r'\usepackage{xcolor}')
rc('pgf', preamble=r'\usepackage{xcolor}', rcfonts=False)

import matplotlib.pyplot as plt
import seaborn as sns

pd.options.mode.chained_assignment = None # surpress is copy warning

In [2]:
team_name = 'Chicago'
df = pd.read_csv('../NHL Attendance.csv')
print(df.head())

    SEASON  RANK          TEAM  HOME GAMES  HOME ATTENDANCE   ROAD GAMES  \
0  2017-18     1       Chicago          41          887,794           41   
1  2017-18     2      Montreal          41          873,283           41   
2  2017-18     3  Philadelphia          41          800,214           41   
3  2017-18     4       Detroit          41          800,115           41   
4  2017-18     5       Toronto          41          786,677           41   

   ROAD ATTENDANCE   TOTAL GAMES  TOTAL ATTENDANCE   
0          723,773            82         1,611,567   
1          733,736            82         1,607,019   
2          702,781            82         1,502,995   
3          717,295            82         1,517,410   
4          751,940            82         1,538,617   


In [3]:
def clean_numeric_column(df, column_name):
    series = df[column_name].str.strip()
    series = series.str.replace(',', '')
    series = pd.to_numeric(series)
    df[column_name] = series
    return df

df = clean_numeric_column(df, ' TOTAL ATTENDANCE ')

In [4]:
def get_value_from_column(df, filter_column, filter_value, value_column):
    row = df.query(f'{filter_column} == @filter_value')
    return row.iloc[0][value_column]

In [5]:
season_sums = df.groupby('SEASON').sum()
season_sums = season_sums.reset_index(level='SEASON')
print(season_sums.head())

season_2000_01 = get_value_from_column(season_sums, 'SEASON', '2000-01', ' TOTAL ATTENDANCE ')
season_2017_18 = get_value_from_column(season_sums, 'SEASON', '2017-18', ' TOTAL ATTENDANCE ')
print(season_2000_01)
print(season_2017_18)

    SEASON  RANK  HOME GAMES  ROAD GAMES  TOTAL GAMES   TOTAL ATTENDANCE 
0  2000-01   465        1232        1232         2464            40799384
1  2001-02   465        1230        1230         2460            41226412
2  2002-03   465        1230        1230         2460            40812257
3  2003-04   465        1230        1230         2460            40711797
4  2005-06   465        1230        1230         2460            41707630
40799384
44347842


In [6]:
team_df = df.query('TEAM == @team_name')
print(team_df.head())

      SEASON  RANK     TEAM  HOME GAMES  HOME ATTENDANCE   ROAD GAMES  \
0    2017-18     1  Chicago          41          887,794           41   
31   2016-17     1  Chicago          41          891,827           41   
61   2015-16     1  Chicago          41          896,240           41   
91   2014-15     1  Chicago          41          892,532           41   
121  2013-14     1  Chicago          41          927,545           41   

     ROAD ATTENDANCE   TOTAL GAMES   TOTAL ATTENDANCE   
0            723,773            82             1611567  
31           761,985            82             1653812  
61           774,572            82             1670812  
91           763,830            82             1656362  
121          730,251            82             1657796  
