# Import Dependencies  & Initialize Notebook    

In [1]:
import pandas as pd 
import plotly.graph_objects as go
import numpy as np
import kaleido

# Import Data 

In [2]:
file_path = '../data/processed/game_log_all.csv'

df = pd.read_csv(file_path, index_col = False)

display(df.head())

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,Date,Game_Number,Day_of_Week,Visiting_Team_Name,Visiting_Team_League,Visiting_Team_Game_Number,Home_Team_Name,Home_Team_League,Home_Team_Game_Number,...,Home_Team_Starting_Player_ID_8,Home_Team_Starting_Player_Name_8,Home_Team_Starting_Player_Defensive_Position_8,Home_Team_Starting_Player_ID_9,Home_Team_Starting_Player_Name_9,Home_Team_Starting_Player_Defensive_Position_9,Additional_Information,Acquisition_Information,Year,Era
0,0,1871-05-04,0,Thu,CL1,,1,FW1,,1,...,mcdej101,James McDermott,8.0,kellb105,Bill Kelly,9.0,,D,1871,Pre-1900
1,1,1871-05-05,0,Fri,BS1,,1,WS3,,1,...,burrh101,Henry Burroughs,9.0,berth101,Henry Berthrong,8.0,HTBF,D,1871,Pre-1900
2,2,1871-05-06,0,Sat,CL1,,2,RC1,,1,...,birdg101,George Bird,7.0,stirg101,Gat Stires,9.0,,D,1871,Pre-1900
3,3,1871-05-08,0,Mon,CL1,,3,CH1,,1,...,pinke101,Ed Pinkham,5.0,zettg101,George Zettlein,1.0,,D,1871,Pre-1900
4,4,1871-05-09,0,Tue,BS1,,2,TRO,,1,...,pikel101,Lip Pike,3.0,cravb101,Bill Craver,6.0,HTBF,D,1871,Pre-1900


In [3]:
selected_columns = ['Year', 'Date', 'Era', 'Visiting_Team_Score', 'Home_Team_Score']
df = df[selected_columns]

## Convert column names to lower snake case
df.columns = [col.lower().replace(' ', '_') for col in df.columns]

df['date'] = pd.to_datetime(df['date'])
df['score_difference'] = abs(df['visiting_team_score'] - df['home_team_score'])

era_labels = ['Pre-1900', 'Dead Ball era', 'Live Ball era', 'Integration era', 'Expansion era', 'Free Agent era', 'Steroid era', 'Contemporary era']
df['era'] = pd.Categorical(df['era'], categories = era_labels, ordered = True)

print(df.shape)
print(df.dtypes)
display(df.head())

(228779, 6)
year                            int64
date                   datetime64[ns]
era                          category
visiting_team_score             int64
home_team_score                 int64
score_difference                int64
dtype: object


Unnamed: 0,year,date,era,visiting_team_score,home_team_score,score_difference
0,1871,1871-05-04,Pre-1900,0,2,2
1,1871,1871-05-05,Pre-1900,20,18,2
2,1871,1871-05-06,Pre-1900,12,4,8
3,1871,1871-05-08,Pre-1900,12,14,2
4,1871,1871-05-09,Pre-1900,9,5,4


In [4]:
group_columns = ['era', 'visiting_team_score', 'home_team_score']

df_agg = df.groupby(group_columns).agg(
    {
        'year': 'count'
    }
)

df_agg.reset_index(drop = False, inplace = True)
df_agg.rename(columns = {'year': 'game_count'}, inplace = True)

## Replace 0's with NaN
df_agg['game_count'] = df_agg['game_count'].replace(0, np.nan)

df_agg.head()

Unnamed: 0,era,visiting_team_score,home_team_score,game_count
0,Pre-1900,0,0,51.0
1,Pre-1900,0,1,130.0
2,Pre-1900,0,2,123.0
3,Pre-1900,0,3,144.0
4,Pre-1900,0,4,129.0


In [5]:
unique_eras = df_agg['era'].unique().tolist()

In [6]:
chosen_era = unique_eras[5]
df_agg_filtered = df_agg[df_agg['era'] == chosen_era]
df_filtered = df[df['era'] == chosen_era]

In [8]:
axis_upper_bound = max(max(df_agg['home_team_score']), max(df_agg['visiting_team_score']))

## Create a heatmap plot
fig = go.Figure(data=go.Heatmap(
        x=df_agg_filtered['home_team_score'].astype(object),
        y=df_agg_filtered['visiting_team_score'].astype(object),
        z=df_agg_filtered['game_count'],
        colorscale='Magma',  # You can choose any colorscale you prefer
        hoverongaps = False))

## Set axis labels and title
fig.update_layout(
    title=f"{chosen_era}",
    xaxis_title='Home Team Score',
    yaxis_title='Visiting Team Score',
    width=1000,
    height=1000,
    yaxis=dict(range = [axis_upper_bound, -0.5], dtick = 1, showgrid = False, zeroline = False),
    xaxis = dict(range = [-0.5, axis_upper_bound], side = 'top', dtick = 1, tickangle=0, showgrid = False, zeroline = False), 
    plot_bgcolor='#BCB8B1', 
    font=dict(family="Courier New", size=14, color="black"), 
    title_x = 0.5, 
    title_xanchor = 'center', 
    title_y = 0.98
)

fig.update_traces(showscale = False)

file_path = f"../reports/figures/heatmap_{chosen_era.lower().replace(" ", "_")}.png"
fig.write_image(file_path)

fig.show()

SyntaxError: invalid syntax (3101139256.py, line 29)

In [None]:
!pip install -U kaleido

In [None]:
axis_upper_bound = max(df['score_difference'])
## Create a density plot of score difference
fig = go.Figure(data=go.Histogram(x=df_filtered['score_difference'], histnorm='density'))

fig.update_layout(
    # title='Density Plot of Score Difference',
    xaxis_title='Score Difference',
    yaxis_title='Density',
    width=1000,
    height=400,
    xaxis = dict(range = [0, axis_upper_bound], dtick = 1),
    plot_bgcolor='#BCB8B1',
    font=dict(family="Courier New", size=14, color="black")
)

fig.show()
