# Import Dependencies  & Initialize Notebook    

In [1]:
import pandas as pd 
import plotly.graph_objects as go
import numpy as np
import kaleido

# Set Global Variables

In [2]:
color_palette = {'eerie_black': '#1C2321', 'cadet_gray': '#7D98A1', 'paynes_gray': '#5E6572', 'powder_blue': '#A9B4C2', 'anti_flash_white': '#EEF1EF'}

# Import Data 

In [4]:
file_path = '../data/processed/game_log_all.csv'

df = pd.read_csv(file_path, index_col = False)

display(df.head())

Unnamed: 0,Year,Date,Era,Visiting_Team_Score,Home_Team_Score
0,1871,1871-05-04,Pre-1900,0,2
1,1871,1871-05-05,Pre-1900,20,18
2,1871,1871-05-06,Pre-1900,12,4
3,1871,1871-05-08,Pre-1900,12,14
4,1871,1871-05-09,Pre-1900,9,5


In [5]:
selected_columns = ['Year', 'Date', 'Era', 'Visiting_Team_Score', 'Home_Team_Score']
df = df[selected_columns]

## Convert column names to lower snake case
df.columns = [col.lower().replace(' ', '_') for col in df.columns]

df['date'] = pd.to_datetime(df['date'])
df['score_difference'] = abs(df['visiting_team_score'] - df['home_team_score'])

era_labels = ['Pre-1900', 'Dead Ball era', 'Live Ball era', 'Integration era', 'Expansion era', 'Free Agent era', 'Steroid era', 'Contemporary era']
df['era'] = pd.Categorical(df['era'], categories = era_labels, ordered = True)

print(df.shape)
print(df.dtypes)
display(df.head())

(228779, 6)
year                            int64
date                   datetime64[ns]
era                          category
visiting_team_score             int64
home_team_score                 int64
score_difference                int64
dtype: object


Unnamed: 0,year,date,era,visiting_team_score,home_team_score,score_difference
0,1871,1871-05-04,Pre-1900,0,2,2
1,1871,1871-05-05,Pre-1900,20,18,2
2,1871,1871-05-06,Pre-1900,12,4,8
3,1871,1871-05-08,Pre-1900,12,14,2
4,1871,1871-05-09,Pre-1900,9,5,4


In [6]:
group_columns = ['era', 'visiting_team_score', 'home_team_score']

df_agg = df.groupby(group_columns).agg(
    {
        'year': 'count'
    }
)

df_agg.reset_index(drop = False, inplace = True)
df_agg.rename(columns = {'year': 'game_count'}, inplace = True)

## Replace 0's with NaN
df_agg['game_count'] = df_agg['game_count'].replace(0, np.nan)

df_agg.head()

Unnamed: 0,era,visiting_team_score,home_team_score,game_count
0,Pre-1900,0,0,51.0
1,Pre-1900,0,1,130.0
2,Pre-1900,0,2,123.0
3,Pre-1900,0,3,144.0
4,Pre-1900,0,4,129.0


In [7]:
unique_eras = df_agg['era'].unique().tolist()

In [8]:
chosen_era = unique_eras[5]
df_agg_filtered = df_agg[df_agg['era'] == chosen_era]
df_filtered = df[df['era'] == chosen_era]

# Create Plots

In [30]:
axis_upper_bound = max(max(df_agg['home_team_score']), max(df_agg['visiting_team_score']))

for era in unique_eras: 
    df_agg_filtered = df_agg[df_agg['era'] == era]

    ## Create a heatmap plot
    fig = go.Figure(data=go.Heatmap(
            x=df_agg_filtered['home_team_score'].astype(object),
            y=df_agg_filtered['visiting_team_score'].astype(object),
            z=df_agg_filtered['game_count'],
            colorscale='Magma',  # You can choose any colorscale you prefer
            hoverongaps = False))

    ## Set axis labels and title
    fig.update_layout(
        title=f"{era}",
        xaxis_title='Home Team Score',
        yaxis_title='Visiting Team Score',
        width=1000,
        height=1000,
        yaxis=dict(
            range = [axis_upper_bound, -0.5], 
            dtick = 1, 
            showgrid = False, 
            zeroline = False, 
            side = 'left'
            ),
        xaxis = dict(
            range = [-0.5, axis_upper_bound], 
            side = 'top', 
            dtick = 1, 
            tickangle=0, 
            showgrid = False, 
            zeroline = False
            ), 
        plot_bgcolor=color_palette['paynes_gray'], 
        font=dict(family="Courier New", size=14, color="black"), 
        title_x = 0.5, 
        title_xanchor = 'center', 
        title_y = 0.98
    )

    fig.update_traces(showscale = False)
    file_path = era.lower().replace(" ", "_")
    file_path = f"../reports/figures/heatmap_{file_path}.png"
    fig.write_image(file_path)

In [23]:
df['score_difference'].quantile(0.95)

9.0

In [27]:
# axis_upper_bound = max(df['score_difference'])
axis_upper_bound = df['score_difference'].quantile(q = 0.99)
for era in unique_eras:
    df_filtered = df[df['era'] == era]
    
    ## Create a density plot of score difference
    fig = go.Figure(data=go.Histogram(x=df_filtered['score_difference'], histnorm='density', marker=dict(color=color_palette['anti_flash_white'])))

    fig.update_layout(
        title= f"{era}", 
        xaxis_title='Score Difference',
        yaxis_title='Density',
        width=1000,
        height=400,
        xaxis = dict(range = [0, axis_upper_bound], dtick = 1),
        plot_bgcolor=color_palette['paynes_gray'], 
        font=dict(family="Courier New", size=14, color="black"), title_x = 0.5, 
        title_xanchor = 'center'
    )
    file_path = era.lower().replace(" ", "_")
    file_path = f"../reports/figures/hist_{file_path}.png"
    fig.write_image(file_path)
