# Pitch Histogams
## Pitch Speed

In [3]:
import os
import numpy as np
import json

In [147]:
# Read Data
hits_folder_path = '/Users/tiffanynguyen/wsd-data/wsd-hits'
no_hits_folder_path = '/Users/tiffanynguyen/wsd-data/wsd-nohit'
data = []

for filename in os.listdir(hits_folder_path):
    if filename.endswith('.jsonl'):
        file_path = os.path.join(hits_folder_path, filename)
        with open(file_path, 'r') as json_file:
            data.append(json.load(json_file))

for filename in os.listdir(no_hits_folder_path):
    if filename.endswith('.jsonl'):
        file_path = os.path.join(no_hits_folder_path, filename)
        with open(file_path, 'r') as json_file:
            data.append(json.load(json_file))

In [45]:
# Get Pitch Types
pitch_type_data = []
pitch_type_data = [cur_data['summary_acts']['pitch']['type'] for cur_data in data if cur_data['summary_acts']['pitch']['type']]
pitch_type_types = list(set(pitch_type_data))
print(pitch_type_types)

['Sinker', 'Curveball', 'Slider', 'Changeup', 'FourSeamFastball', 'Cutter']


In [46]:
pitch_speed = [[] for _ in range(len(pitch_type_types))]

for cur_data in data:
    if(cur_data['summary_acts']['pitch']['type'] and cur_data['summary_acts']['pitch']['speed']['mph']):
        index = pitch_type_types.index(cur_data['summary_acts']['pitch']['type'])
        pitch_speed[index].append(cur_data['summary_acts']['pitch']['speed']['mph'])

In [47]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [48]:
# Calculate the minimum and maximum speed
min_speed = min(min(speed) for speed in pitch_speed)
max_speed = max(max(speed) for speed in pitch_speed)

# Create subplots
fig = make_subplots(rows=len(pitch_type_types), cols=1, subplot_titles=pitch_type_types)

# Add histograms to subplots
for i in range(len(pitch_type_types)):
    hist = go.Histogram(
        x=pitch_speed[i],
        nbinsx=6,
        marker_color='red',
        marker_line_color='black',
        marker_line_width=1
    )

    fig.add_trace(hist, row=i+1, col=1)

    # Update x and y axis labels for each subplot
    fig.update_xaxes(title_text="Speed (mph)", row=i+1, col=1)
    fig.update_yaxes(title_text="Frequency", row=i+1, col=1)

# Update layout
fig.update_layout(
    height=1000,  # Adjust height as needed
    width=500,   # Adjust width as needed
    title_text='Pitch Initial Speed Frequency',
    showlegend=False
)

fig.show()

In [49]:
# Overlay
fig = go.Figure()
for i in range(len(pitch_type_types)):
    fig.add_trace(go.Histogram(x=pitch_speed[i], name=pitch_type_types[i]))

# Overlay both histograms
fig.update_layout(barmode='overlay', title_text='Pitch Initial Speed Frequency')
fig.update_xaxes(title_text="Speed (mph)")
fig.update_yaxes(title_text="Frequency")

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

## Pitch Spin Histogram

In [50]:
pitch_spin = [[] for _ in range(len(pitch_type_types))]

for cur_data in data:
    if(cur_data['summary_acts']['pitch']['type'] and cur_data['summary_acts']['pitch']['spin']['rpm']):
        index = pitch_type_types.index(cur_data['summary_acts']['pitch']['type'])
        pitch_speed[index].append(cur_data['summary_acts']['pitch']['spin']['rpm'])

In [61]:
# Overlay
fig = go.Figure()
for i in range(len(pitch_type_types)):
    fig.add_trace(go.Histogram(x=pitch_speed[i], name=pitch_type_types[i]))
    
# Overlay both histograms
fig.update_layout(barmode='overlay', title_text='Pitch Initial Speed Frequency')
fig.update_xaxes(title_text="Spin (rpm)")
fig.update_yaxes(title_text="Frequency")

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

# Pitch Density Heatmap
## Pitch type and Result

In [144]:
import plotly.express as px
import pandas as pd

In [148]:
pitch_type_data = []
pitch_result_data = []
for cur_data in data:
    if(cur_data['summary_acts']['pitch']['type'] and cur_data['summary_acts']['pitch']['result']):
        pitch_type_data.append(cur_data['summary_acts']['pitch']['type'])
        pitch_result_data.append(cur_data['summary_acts']['pitch']['result'])

In [149]:
pitch_type_types = list(set(pitch_type_data))
pitch_result_types = list(set(pitch_result_data))
print("Pitch Types:", pitch_type_types)
print("Pitch Results:", pitch_result_types)

Pitch Types: ['Sinker', 'Curveball', 'Slider', 'Changeup', 'FourSeamFastball', 'Cutter']
Pitch Results: ['Strike', 'HitIntoPlay', 'Ball']


In [150]:
pitch_matrix = np.zeros([len(pitch_type_types), len(pitch_result_types)])
for i in range(len(pitch_type_data)):
    pitch_matrix[pitch_type_types.index(pitch_type_data[i])][pitch_result_types.index(pitch_result_data[i])] += 1
print(pitch_matrix)

#convert to dictionary
type_result_dict = {'Pitch Type': pitch_type_types}
for i, outcome in enumerate(pitch_result_types):
    type_result_dict[outcome] = pitch_matrix[:, i].tolist()
print(type_result_dict)

#Make dataframe
type_result_df = pd.DataFrame(type_result_dict)

# Melt the DataFrame to have a column for the pitch type and its corresponding values
melted_df = pd.melt(type_result_df, id_vars='Pitch Type', var_name='Outcome', value_name='Count')
print(melted_df)

[[14.  6. 21.]
 [ 3.  0.  5.]
 [ 7.  7. 12.]
 [ 1.  4.  1.]
 [ 4.  5.  3.]
 [ 3.  2.  8.]]
{'Pitch Type': ['Sinker', 'Curveball', 'Slider', 'Changeup', 'FourSeamFastball', 'Cutter'], 'Strike': [14.0, 3.0, 7.0, 1.0, 4.0, 3.0], 'HitIntoPlay': [6.0, 0.0, 7.0, 4.0, 5.0, 2.0], 'Ball': [21.0, 5.0, 12.0, 1.0, 3.0, 8.0]}
          Pitch Type      Outcome  Count
0             Sinker       Strike   14.0
1          Curveball       Strike    3.0
2             Slider       Strike    7.0
3           Changeup       Strike    1.0
4   FourSeamFastball       Strike    4.0
5             Cutter       Strike    3.0
6             Sinker  HitIntoPlay    6.0
7          Curveball  HitIntoPlay    0.0
8             Slider  HitIntoPlay    7.0
9           Changeup  HitIntoPlay    4.0
10  FourSeamFastball  HitIntoPlay    5.0
11            Cutter  HitIntoPlay    2.0
12            Sinker         Ball   21.0
13         Curveball         Ball    5.0
14            Slider         Ball   12.0
15          Changeup         

In [151]:
# Create a heatmap with Plotly Express
fig = px.imshow(melted_df.pivot(index='Pitch Type', columns='Outcome', values='Count'),
                x=melted_df['Outcome'].unique(),
                y=melted_df['Pitch Type'].unique(),
                labels=dict(x='Outcome', y='Pitch Type', color='Count'),
                title='Pitch Type Outcome Counts',
                width=450,
                height=500, 
                text_auto=True)

# Show the plot
fig.show()

## Pitch Type and Action

In [156]:
pitch_type_data = []
pitch_action_data = []
for cur_data in data:
    if(cur_data['summary_acts']['pitch']['type'] and cur_data['summary_acts']['pitch']['action']):
        pitch_type_data.append(cur_data['summary_acts']['pitch']['type'])
        pitch_action_data.append(cur_data['summary_acts']['pitch']['action'])

In [157]:
pitch_type_types = list(set(pitch_type_data))
pitch_action_types = list(set(pitch_action_data))
print("Pitch Types:", pitch_type_types)
print("Pitch Action:", pitch_action_types)

Pitch Types: ['Sinker', 'Curveball', 'Slider', 'Changeup', 'FourSeamFastball', 'Cutter']
Pitch Action: ['HitByPitch', 'Foul', 'Called', 'BallInDirt']


In [164]:
pitch_type_action_matrix = np.zeros([len(pitch_type_types), len(pitch_action_types)])
for i in range(len(pitch_type_data)):
    pitch_type_action_matrix[pitch_type_types.index(pitch_type_data[i])][pitch_action_types.index(pitch_action_data[i])] += 1
print(pitch_type_action_matrix)

#convert to dictionary
type_action_dict = {'Pitch Type': pitch_type_types}
for i, outcome in enumerate(pitch_action_types):
    type_action_dict[outcome] = pitch_type_action_matrix[:, i].tolist()
print(type_action_dict)

#Make dataframe
type_action_df = pd.DataFrame(type_action_dict)
print(type_action_df)

# Melt the DataFrame to have a column for the pitch type and its corresponding values
melted_type_action_df = pd.melt(type_action_df, id_vars='Pitch Type', var_name='Pitch Action', value_name='Count')
print(melted_type_action_df)

[[ 1.  8. 26.  0.]
 [ 0.  1.  6.  1.]
 [ 1.  3. 12.  2.]
 [ 0.  0.  1.  0.]
 [ 0.  1.  6.  0.]
 [ 0.  2.  8.  0.]]
{'Pitch Type': ['Sinker', 'Curveball', 'Slider', 'Changeup', 'FourSeamFastball', 'Cutter'], 'HitByPitch': [1.0, 0.0, 1.0, 0.0, 0.0, 0.0], 'Foul': [8.0, 1.0, 3.0, 0.0, 1.0, 2.0], 'Called': [26.0, 6.0, 12.0, 1.0, 6.0, 8.0], 'BallInDirt': [0.0, 1.0, 2.0, 0.0, 0.0, 0.0]}
         Pitch Type  HitByPitch  Foul  Called  BallInDirt
0            Sinker         1.0   8.0    26.0         0.0
1         Curveball         0.0   1.0     6.0         1.0
2            Slider         1.0   3.0    12.0         2.0
3          Changeup         0.0   0.0     1.0         0.0
4  FourSeamFastball         0.0   1.0     6.0         0.0
5            Cutter         0.0   2.0     8.0         0.0
          Pitch Type Pitch Action  Count
0             Sinker   HitByPitch    1.0
1          Curveball   HitByPitch    0.0
2             Slider   HitByPitch    1.0
3           Changeup   HitByPitch    0.0
4   Fo

In [167]:
# Create a heatmap with Plotly Express
fig = px.imshow(melted_type_action_df.pivot(index='Pitch Type', columns='Pitch Action', values='Count'),
                x=melted_type_action_df['Pitch Action'].unique(),
                y=melted_type_action_df['Pitch Type'].unique(),
                labels=dict(x='Pitch Action', y='Pitch Type', color='Count'),
                title='Pitch Type Action Counts',
                width=450,
                height=500, 
                text_auto=True)

# Show the plot
fig.show()