Average Number of Games/Opponents for College Softball Teams in 2024:

In [107]:
import os
import numpy as np
import pandas as pd
import scipy as sp
from datetime import datetime

gbgDF = pd.read_csv(os.getcwd() + "\\Stat-Files\\SB_2024_DPGameByGame.csv")
gbgDF["Date"] = pd.to_datetime(gbgDF["Date"])
gbgDF = gbgDF[gbgDF["Date"] < datetime(2024, 5, 16)]
gbgConfDF = gbgDF[gbgDF["TeamConference"] == gbgDF["OppConference"]]
uniqueUIDs24 = gbgDF["TeamUID"].unique()
numOpps = 0
numConfOpps = 0
numGames = 0
numConfGames = 0

for uid in uniqueUIDs24:
    numTeamGames = len(gbgDF[gbgDF["TeamUID"] == uid])
    numTeamOpps = len(gbgDF[gbgDF["TeamUID"] == uid].loc[:, "OppUID"].unique())
    numOpps += numTeamOpps
    numGames += numTeamGames
    numTeamConfGames = len(gbgConfDF[gbgConfDF["TeamUID"] == uid])
    numTeamConfOpps = len(gbgConfDF[gbgConfDF["TeamUID"] == uid].loc[:, "OppUID"].unique())
    numConfOpps += numTeamConfOpps
    numConfGames += numTeamConfGames


print(f"The average number of teams that a given college softball team (of the {len(uniqueUIDs24)} possible teams) competed against in the 2024 regular season was: {numOpps/len(uniqueUIDs24)}")
print(f"The average number of games played by a given college softball team in the 2024 regular season was: {numGames/len(uniqueUIDs24)}")
print(f"The average number of teams that a given college softball team (of the {len(uniqueUIDs24)} possible teams) competed against in the 2024 regular season IN CONFERENCE PLAY was: {numConfOpps/len(uniqueUIDs24)}")
print(f"The average number of games played by a given college softball team AGAINST CONFERENCE OPPONENTS in the 2024 regular season was: {numConfGames/len(uniqueUIDs24)}")

The average number of teams that a given college softball team (of the 296 possible teams) competed against in the 2024 regular season was: 24.945945945945947
The average number of games played by a given college softball team in the 2024 regular season was: 48.8277027027027
The average number of teams that a given college softball team (of the 296 possible teams) competed against in the 2024 regular season IN CONFERENCE PLAY was: 7.783783783783784
The average number of games played by a given college softball team AGAINST CONFERENCE OPPONENTS in the 2024 regular season was: 24.631756756756758


Attempt to implement Penn & Donnelly Double Poisson model (did not converge)

In [None]:
import os
import numpy as np
import pandas as pd
import scipy as sp

gbgDF = pd.read_csv(os.getcwd() + "\\Stat-Files\\SB_2024_DPGameByGame.csv")
tcnDF = pd.read_csv(os.getcwd() + "\\TCN-Files\\2024_TCN.csv").rename(columns={"Unnamed: 0" : "TeamNum"})
gbgDF = gbgDF.merge(tcnDF[["TeamNum", "UID"]], left_on="TeamUID", right_on="UID")
gbgDF = gbgDF.merge(tcnDF[["TeamNum", "UID"]].rename(columns={"TeamNum" : "OppNum"}), left_on="OppUID", right_on="UID")

# Construct empty matrices for games played and runs scored
gamesM = np.zeros((len(tcnDF), len(tcnDF)))
runsM = np.zeros((len(tcnDF), len(tcnDF)))

# Construct empty vectors for total runs for (rsV) and runs against (raV)
rfV = np.zeros(len(tcnDF))
raV = np.zeros(len(tcnDF))

# Iterate over gbgDF to fill in matrix
for i in range(len(gbgDF)):
    teamNum = gbgDF.loc[i, "TeamNum"]
    oppNum = gbgDF.loc[i, "OppNum"]
    teamRuns = gbgDF.loc[i, "RunsFor"]
    oppRuns = gbgDF.loc[i, "RunsAgainst"]
    
    # Add 1 to each of the games played entries
    # Because each game is "in our dataset twice", we only do this from one team's perspective
    gamesM[teamNum, oppNum] = gamesM[teamNum, oppNum] + 1
    
    # Add to the runs for vector and runs part of matrix.
    # Because each game is "in our dataset twice", we only do this from one team's perspective
    rfV[teamNum] = rfV[teamNum] + teamRuns
    raV[oppNum] = raV[oppNum] + teamRuns
    runsM[teamNum, oppNum] = runsM[teamNum, oppNum] + teamRuns
    
# Build objective function:
def Objective(OaDv):
    # OaDv, for n teams, is a one-dimensional matrix of length 2n that that is [Oa; Dv]
    blockDiag = sp.linalg.block_diag(gamesM, gamesM)
    DvOa = np.concatenate((OaDv[(int(len(OaDv)/2)):], OaDv[:(int(len(OaDv)/2))]))
    RfRa = np.concatenate((rfV, raV))
    return np.abs(((blockDiag@OaDv)*(DvOa) - RfRa))

valuesOaDv = np.concatenate((rfV/np.sum(rfV), raV/np.sum(raV)))
numRuns = 0

while np.sum(Objective(valuesOaDv)*Objective(valuesOaDv)) > 0.01 or numRuns > 25:
    valuesOaDv = sp.optimize.fsolve(Objective, valuesOaDv)
    print(sum(Objective(valuesOaDv)*Objective(valuesOaDv)))
    numRuns += 1