In [1]:
# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd
import requests
from pathlib import Path

# Libraries to help with data visualization
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [62]:
# Study data files
searched_teamWL_path = "data/searched_teams_results.csv"

team_record_data = pd.read_csv(searched_teamWL_path)

# Read the CSV into a Pandas DataFrame
team_recordsDF = pd.DataFrame(team_record_data)

# Convert the "Date" column to datetime format
team_recordsDF["Date"] = pd.to_datetime(team_recordsDF["Date"], format="%a, %b %d, %Y")
# Get the week ending (on a Sunday) for each date to align with the Google search data
team_recordsDF['Week'] = team_recordsDF['Date'] + pd.to_timedelta(6 - team_recordsDF['Date'].dt.dayofweek, unit='D')

# Create a new column to store the updated winning percentage for each team's games in each week
team_recordsDF['Weekly Win%'] = 0.0
# Initialize a new column "PW_win%" in the DataFrame
team_recordsDF['PW_win_pct'] = 0.0

team_recordsDF.drop(columns=['G','Start (ET)','Unnamed: 4','Unnamed: 5','Unnamed: 6','Opponent','Unnamed: 9','Tm','Opp','Streak','Notes'],axis=1,inplace=True)

# Group the DataFrame by the "TEAM" column
grouped = team_recordsDF.groupby('TEAM')

# Iterate through each group (team) in the grouped DataFrame
for team, group in grouped:
    
    # Initialize variables to track wins and total games played
    wins = 0
    total_games = 0

    # Initialize variables to track wins and total games played for each week
    weekly_wins = 0
    weekly_total_games = 0
    prev_week = None
    prev_wk_win_pct = None

    # Sort the group by the "Date"
    group = group.sort_values(by=['Week', 'Date'])

    # Iterate through each row in the group
    for index, row in group.iterrows():

        # Update the "PW_win%" column with the previous week's "Weekly Win%" value
        if prev_week is not None and row['Week']!=prev_week:
            team_recordsDF.loc[(team_recordsDF['TEAM'] == row['TEAM']) & (team_recordsDF['Date'] == row['Date']), 'PW_win_pct'] = PW_win_pct

        # Reset wins and total games at the start of each week
        if weekly_total_games == 0 or row['Week'] != prev_week:
            weekly_wins = 0
            weekly_total_games = 0
            if prev_week is not None:
                PW_win_pct = weekly_win_percentage# Update the previous week's "Weekly Win%" value
        
        # Update wins and total games based on game results
        if row['game_result'] == 'W':
            wins += 1
            weekly_wins +=1
            
        total_games += 1
        weekly_total_games += 1
        # Add a 
        # team_recordsDF.loc[(team_recordsDF['TEAM'] == row['TEAM']) & (team_recordsDF['Date'] == row['Date']), 'PW_win_pct'] = PW_win_pct

        # Calculate and print the cumulative winning percentage after each game
        win_percentage = wins / total_games
        team_recordsDF.loc[(team_recordsDF['TEAM'] == row['TEAM']) & (team_recordsDF['Date']==row['Date']), "Cum Win%"] = win_percentage

        # Calculate the winning percentage for each week and update the new column
        weekly_win_percentage = weekly_wins / weekly_total_games
        team_recordsDF.loc[(team_recordsDF['TEAM'] == row['TEAM']) & (team_recordsDF['Date'] == row['Date']), "Weekly Win%"] = weekly_win_percentage
        #PW_win_pct = weekly_win_percentage

        if prev_week is not None:
            PW_win_pct = PW_win_pct  # Add the same value to PW_win_pct for rows with the same week
            team_recordsDF.loc[(team_recordsDF['TEAM'] == row['TEAM']) & (team_recordsDF['Date'] == row['Date']), 'PW_win_pct'] = PW_win_pct
        else:
            PW_win_pct = weekly_win_percentage  # Update PW_win_pct when a new week begin

        prev_week = row['Week']

# Group the DataFrame by 'TEAM' and 'Week' and get the index of the row with the latest date in each group
latest_dates_idx = team_recordsDF.groupby(['TEAM', 'Week'])['Date'].idxmax()

# Filter the DataFrame using the obtained index
latest_records = team_recordsDF.loc[latest_dates_idx]
latest_records.drop(columns=['Date','game_result','W','L'],index=1,inplace=True)
latest_records = latest_records[latest_records['Week'].dt.year == 2024]

# Display the filtered DataFrame with only the latest records for each 'TEAM' and 'Week'
latest_records.head(25)

Unnamed: 0,TEAM,Week,Weekly Win%,PW_win_pct,Cum Win%
34,Boston Celtics,2024-01-07,0.666667,1.0,0.8
38,Boston Celtics,2024-01-14,0.5,0.666667,0.769231
42,Boston Celtics,2024-01-21,0.75,0.5,0.767442
45,Boston Celtics,2024-01-28,0.666667,0.75,0.76087
49,Boston Celtics,2024-02-04,0.75,0.666667,0.76
52,Boston Celtics,2024-02-11,1.0,0.75,0.773585
54,Boston Celtics,2024-02-18,1.0,1.0,0.781818
56,Boston Celtics,2024-02-25,1.0,1.0,0.789474
59,Boston Celtics,2024-03-03,1.0,1.0,0.8
62,Boston Celtics,2024-03-10,0.333333,1.0,0.777778


In [60]:
import csv

# Define the output file path
output_file = "team_weekly_stats.csv"

# Define the header for the CSV file
header = ["TEAM", "Week", "PW_win_pct","Weekly_Win%", "Cum_Win%"]

# Open the output file and write the header
with open(output_file, "w", newline='') as datafile:
    writer = csv.writer(datafile)
    writer.writerow(header)

    # Iterate through the rows of the DataFrame and write each row to the CSV file
    for index, row in latest_records.iterrows():
        team = row["TEAM"]
        week = row["Week"]
        pw_win_pct = "{:.3f}".format(row["PW_win_pct"])
        weekly_win_pct = "{:.3f}".format(row["Weekly Win%"])
        cum_win_pct = "{:.3f}".format(row["Cum Win%"])

        data_row = [team, week, pw_win_pct, weekly_win_pct, cum_win_pct]
        writer.writerow(data_row)

In [35]:
# Study data files
searched_teamWL_path = "data/searched_teams_results.csv"

team_record_data = pd.read_csv(searched_teamWL_path)

# Read the CSV into a Pandas DataFrame
team_recordsDF = pd.DataFrame(team_record_data)

# Display the data table for preview
team_recordsDF.head()

Unnamed: 0,G,TEAM,Date,Start (ET),Unnamed: 4,Unnamed: 5,Unnamed: 6,Opponent,Unnamed: 8,Unnamed: 9,Tm,Opp,W,L,Streak,Notes
0,1,Boston Celtics,"Wed, Oct 25, 2023",7:00p,,Box Score,@,New York Knicks,W,,108,104,1,0,W 1,
1,2,Boston Celtics,"Fri, Oct 27, 2023",7:30p,,Box Score,,Miami Heat,W,,119,111,2,0,W 2,
2,3,Boston Celtics,"Mon, Oct 30, 2023",7:00p,,Box Score,@,Washington Wizards,W,,126,107,3,0,W 3,
3,4,Boston Celtics,"Wed, Nov 1, 2023",7:30p,,Box Score,,Indiana Pacers,W,,155,104,4,0,W 4,
4,5,Boston Celtics,"Sat, Nov 4, 2023",8:00p,,Box Score,@,Brooklyn Nets,W,,124,114,5,0,W 5,


In [36]:
# Convert the 'Date' column to datetime format
team_recordsDF['Date'] = pd.to_datetime(team_recordsDF['Date'], format='%a, %b %d, %Y')

# Create a new column for the week-beginning date
team_recordsDF['Week_Beginning'] = team_recordsDF['Date'] - pd.to_timedelta(team_recordsDF['Date'].dt.dayofweek, unit='D')

# Calculate winning percentage for each game
team_recordsDF['Winning %'] = team_recordsDF['W'] / (team_recordsDF['W'] + team_recordsDF['L'])

# Calculate the cumulative winning percentage up to that week
team_recordsDF['Cumulative_Win_Percentage'] = team_recordsDF['W'].cumsum() / (team_recordsDF['W'].cumsum() + team_recordsDF['L'].cumsum())

# Reset index and add a column for the week start date
weekly_recordDF=team_recordsDF[['TEAM', 'Week_Beginning', 'Winning %', 'Cumulative_Win_Percentage']]

# Display the resulting DataFrame with weekly winning percentages and cumulative season winning percentage
print(weekly_recordDF)

# Save the modified DataFrame to a CSV file
weekly_recordDF.to_csv('data/team_records_output.csv', index=False)

                       TEAM Week_Beginning  Winning %  \
0            Boston Celtics     2023-10-23   1.000000   
1            Boston Celtics     2023-10-23   1.000000   
2            Boston Celtics     2023-10-30   1.000000   
3            Boston Celtics     2023-10-30   1.000000   
4            Boston Celtics     2023-10-30   1.000000   
..                      ...            ...        ...   
405  Minnesota Timberwolves     2024-04-01   0.692308   
406  Minnesota Timberwolves     2024-04-08   0.696203   
407  Minnesota Timberwolves     2024-04-08   0.687500   
408  Minnesota Timberwolves     2024-04-08   0.691358   
409  Minnesota Timberwolves     2024-04-08   0.682927   

     Cumulative_Win_Percentage  
0                     1.000000  
1                     1.000000  
2                     1.000000  
3                     1.000000  
4                     1.000000  
..                         ...  
405                   0.672198  
406                   0.672311  
407               

In [108]:
# Study data files
searched_teamWL_path = "data/searched_teams_results.csv"

team_record_data = pd.read_csv(searched_teamWL_path)

# Read the CSV into a Pandas DataFrame
team_recordsDF = pd.DataFrame(team_record_data)

# Display the data table for preview
team_recordsDF.head()

# Convert the 'Date' column to datetime format
team_recordsDF['Date'] = pd.to_datetime(team_recordsDF['Date'], format='%a, %b %d, %Y')

# Create a new column for the week-beginning date
team_recordsDF['Week'] = team_recordsDF['Date'] - pd.to_timedelta(team_recordsDF['Date'].dt.dayofweek, unit='D')
#print(team_recordsDF['Week'])

# Calculate winning percentage for each game
#team_recordsDF['Winning %'] = team_recordsDF['W'] / (team_recordsDF['W'] + team_recordsDF['L'])

team_stats = {}

for index, row in team_recordsDF.iterrows():
#   weekly_wins = {}
#   weekly_games = {}
    
    team = row['TEAM']
    game_result = row['game_result']
    week = row['Week'] - pd.to_timedelta(row['Week'].dayofweek, unit='D')
    print(week)
    
    # Check if the team is already in the dictionary, if not, initialize their stats
    if team not in team_stats:
        team_stats[team] = {'TEAM': 'team',
                            'total_games': 0,
                            'total_wins': 0,
                            'weeks': [],
                            'weekly_wins': {},
                            'weekly_games': {},
                            'weekly_win%': 0,
                            'cum_win%': 0}

        # Update the team's stats based on the game result
        if game_result == "W":
            # Check if the week is already in the team's dictionary, if not, initialize it
            if week not in team_stats[team]['weekly_wins']:
                team_stats[team]['weeks'].append(week)
                team_stats[team]['weekly_wins'][week] = 0
                team_stats[team]['weekly_games'][week] = 0
                
            team_stats[team]['total_games'] +=1
            team_stats[team]['total_wins'] +=1
            team_stats[team]['weekly_wins'][week] +=1
            team_stats[team]['weekly_games'][week] +=1
            
            # Calculate weekly and cumulative win percentages
            team_stats[team]['weekly_win%'] = team_stats[team]['weekly_wins'][week] / team_stats[team]['weekly_games'][week]
            team_stats[team]['cum_win%'] = team_stats[team]['total_wins'] / team_stats[team]['total_games']
        
        else:
            if week not in team_stats[team]['weekly_wins']:
                team_stats[team]['weeks'].append(week)
                team_stats[team]['weekly_wins'][week] = 0
                team_stats[team]['weekly_games'][week] = 0
            team_stats[team]['total_games'] +=1
            team_stats[team]['weekly_games'][week] +=1
            
            # Calculate weekly and cumulative win percentages
            team_stats[team]['weekly_win%'] = team_stats[team]['weekly_wins'][week] / team_stats[team]['weekly_games'][week]
            team_stats[team]['cum_win%'] = team_stats[team]['total_wins'] / team_stats[team]['total_games']


    # Calculate the cumulative win percentage for the team



 #   print(f"Team: {team}")
 #   print(f"Current week winning percentage: {weekly_win_percentage:.2f}%")
 #   print(f"Cumulative winning percentage: {cumulative_win_percentage:.2f}%")

print(team_stats)
# Calculate the cumulative winning percentage up to that week
team_recordsDF['Cumulative_Win_Percentage'] = team_recordsDF['W'].cumsum() / (team_recordsDF['W'].cumsum() + team_recordsDF['L'].cumsum())

# Reset index and add a column for the week start date
#eekly_recordDF=team_recordsDF[['TEAM', 'Week', 'Winning %', 'Cumulative_Win_Percentage']]

# Display the resulting DataFrame with weekly winning percentages and cumulative season winning percentage
#print(weekly_recordDF)

# Save the modified DataFrame to a CSV file
weekly_recordDF.to_csv('data/team_records_output.csv', index=False)

2023-10-23 00:00:00
2023-10-23 00:00:00
2023-10-30 00:00:00
2023-10-30 00:00:00
2023-10-30 00:00:00
2023-11-06 00:00:00
2023-11-06 00:00:00
2023-11-06 00:00:00
2023-11-06 00:00:00
2023-11-13 00:00:00
2023-11-13 00:00:00
2023-11-13 00:00:00
2023-11-13 00:00:00
2023-11-20 00:00:00
2023-11-20 00:00:00
2023-11-20 00:00:00
2023-11-20 00:00:00
2023-11-27 00:00:00
2023-11-27 00:00:00
2023-12-04 00:00:00
2023-12-04 00:00:00
2023-12-11 00:00:00
2023-12-11 00:00:00
2023-12-11 00:00:00
2023-12-11 00:00:00
2023-12-18 00:00:00
2023-12-18 00:00:00
2023-12-18 00:00:00
2023-12-25 00:00:00
2023-12-25 00:00:00
2023-12-25 00:00:00
2023-12-25 00:00:00
2024-01-01 00:00:00
2024-01-01 00:00:00
2024-01-01 00:00:00
2024-01-08 00:00:00
2024-01-08 00:00:00
2024-01-08 00:00:00
2024-01-08 00:00:00
2024-01-15 00:00:00
2024-01-15 00:00:00
2024-01-15 00:00:00
2024-01-15 00:00:00
2024-01-22 00:00:00
2024-01-22 00:00:00
2024-01-22 00:00:00
2024-01-29 00:00:00
2024-01-29 00:00:00
2024-01-29 00:00:00
2024-01-29 00:00:00


In [46]:
# Convert the 'Date' column to datetime format
team_recordsDF['Date'] = pd.to_datetime(team_recordsDF['Date'], format='%a, %b %d, %Y')

# Create a new column for the week-beginning date
team_recordsDF['Week'] = team_recordsDF['Date'] + pd.to_timedelta(6 - team_recordsDF['Date'].dt.dayofweek, unit='D')

# Calculate the winning percentage for all games in the current "Week"
team_recordsDF['Winning %'] = team_recordsDF['W'] / (team_recordsDF['W'] + team_recordsDF['L'])

# Calculate the winning percentage record after the last game from the previous week
#team_recordsDF['PW Winning %'] = team_recordsDF.groupby('TEAM')['Winning %'].shift(fill_value=0)
#team_recordsDF.rename(columns={'Winning %': 'PW Winning %'}, inplace=True)

# Calculate the cumulative winning percentage up to that week
team_recordsDF['Cumulative_Win_Percentage'] = team_recordsDF.['W'] / (team_recordsDF['W'] + team_recordsDF['L'])
#team_recordsDF['Cumulative_Win_Percentage'] = team_recordsDF.groupby('TEAM')['W'].cumsum() / (team_recordsDF.groupby('TEAM')['W'].cumsum() + team_recordsDF.groupby('TEAM')['L'].cumsum())

# Reset index and add a column for the week start date
weekly_recordsDF=team_recordsDF[['TEAM', 'Week', 'Winning %','Cumulative_Win_Percentage']]

#Drop the records for any weeks in 2023:
#weekly_recordDF=weekly_recordDF[weekly_recordDF['Week'].dt.year != 2023]                   

# Display the resulting DataFrame with weekly winning percentages and cumulative season winning percentage
print(weekly_recordsDF)
print(weekly_recordsDF.tail(82))
weekly_recordsDF.to_csv('data/team_records_output2.csv', index=False)

                       TEAM       Week  Winning %  Cumulative_Win_Percentage
0            Boston Celtics 2023-10-29   1.000000                   1.000000
1            Boston Celtics 2023-10-29   1.000000                   1.000000
2            Boston Celtics 2023-11-05   1.000000                   1.000000
3            Boston Celtics 2023-11-05   1.000000                   1.000000
4            Boston Celtics 2023-11-05   1.000000                   1.000000
..                      ...        ...        ...                        ...
405  Minnesota Timberwolves 2024-04-07   0.692308                   0.692308
406  Minnesota Timberwolves 2024-04-14   0.696203                   0.696203
407  Minnesota Timberwolves 2024-04-14   0.687500                   0.687500
408  Minnesota Timberwolves 2024-04-14   0.691358                   0.691358
409  Minnesota Timberwolves 2024-04-14   0.682927                   0.682927

[410 rows x 4 columns]
                       TEAM       Week  Winning %  C

In [56]:
# Study data files
searched_teamWL_path = "data/searched_teams_results.csv"

team_record_data = pd.read_csv(searched_teamWL_path)

# Read the CSV into a Pandas DataFrame
team_recordsDF = pd.DataFrame(team_record_data)

# Convert the 'Date' column to datetime format
team_recordsDF['Date'] = pd.to_datetime(team_recordsDF['Date'], format='%a, %b %d, %Y')

# Create a new column for the week-beginning date
team_recordsDF['Week'] = team_recordsDF['Date'] + pd.to_timedelta(6 - team_recordsDF['Date'].dt.dayofweek, unit='D')

# Initialize dictionaries to store weekly and cumulative win counts for each team
weekly_wins = {}
cumulative_wins = {}

for team in team_recordsDF['TEAM'].unique():
    team_df = team_recordsDF[team_recordsDF['TEAM'] == team].copy()
    
    # Initialize weekly and cumulative win counts for the team
    weekly_wins[team] = []
    cumulative_wins[team] = []
    
    for week_ending_date in team_df['Week'].unique():
        weekly_df = team_df[team_df['Week'] == week_ending_date]
        
        # Calculate weekly win count and winning percentage for the team
        weekly_win_count = (weekly_df['game_result'] == 'W').sum()
        weekly_game_count = len(weekly_df)
        weekly_win_percentage = weekly_win_count / weekly_game_count if weekly_game_count > 0 else 0
        
        weekly_wins[team].append(weekly_win_percentage)
        
        # Calculate cumulative win count and winning percentage for the team
        cumulative_win_count = (team_df.loc[team_df['Week'] <= week_ending_date, 'game_result'] == 'W').sum()
        cumulative_game_count = len(team_df.loc[team_df['Week'] <= week_ending_date])
        cumulative_win_percentage = cumulative_win_count / cumulative_game_count if cumulative_game_count > 0 else 0
        
        cumulative_wins[team].append(cumulative_win_percentage)

# Add weekly and cumulative win percentages to the DataFrame
#team_recordsDF['Weekly_Win_Percentage'] = team_recordsDF['TEAM'].apply(lambda x: weekly_wins[x].pop(0))
#team_recordsDF['Cumulative_Win_Percentage'] = team_recordsDF['TEAM'].apply(lambda x: cumulative_wins[x].pop(0))

print(team_recordsDF)

      G                    TEAM       Date Start (ET)  Unnamed: 4 Unnamed: 5  \
0     1          Boston Celtics 2023-10-25      7:00p         NaN  Box Score   
1     2          Boston Celtics 2023-10-27      7:30p         NaN  Box Score   
2     3          Boston Celtics 2023-10-30      7:00p         NaN  Box Score   
3     4          Boston Celtics 2023-11-01      7:30p         NaN  Box Score   
4     5          Boston Celtics 2023-11-04      8:00p         NaN  Box Score   
..   ..                     ...        ...        ...         ...        ...   
405  78  Minnesota Timberwolves 2024-04-07     10:00p         NaN  Box Score   
406  79  Minnesota Timberwolves 2024-04-09      8:00p         NaN  Box Score   
407  80  Minnesota Timberwolves 2024-04-10     10:00p         NaN  Box Score   
408  81  Minnesota Timberwolves 2024-04-12      8:00p         NaN  Box Score   
409  82  Minnesota Timberwolves 2024-04-14      3:30p         NaN  Box Score   

    Unnamed: 6            Opponent game