In [72]:
# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd
import requests
from pathlib import Path

# Libraries to help with data visualization
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [73]:
# Import Weekly record and NBA playoff team performance data files
season_record_path = "data/team_weekly_stats.csv"

team_record_data = pd.read_csv(season_record_path)

# Read the CSV into a Pandas DataFrame
team_recordsDF = pd.DataFrame(team_record_data)
team_recordsDF = team_recordsDF.rename(columns={'TEAM': 'Team'})

team_recordsDF["Week"] = pd.to_datetime(team_recordsDF["Week"])

print(team_recordsDF)

               Team       Week  PW_win_pct  Weekly_Win%  Cum_Win%
0    Boston Celtics 2024-01-07       1.000        0.667     0.800
1    Boston Celtics 2024-01-14       0.667        0.500     0.769
2    Boston Celtics 2024-01-21       0.500        0.750     0.767
3    Boston Celtics 2024-01-28       0.750        0.667     0.761
4    Boston Celtics 2024-02-04       0.667        0.750     0.760
..              ...        ...         ...          ...       ...
70  New York Knicks 2024-03-17       0.333        1.000     0.597
71  New York Knicks 2024-03-24       1.000        0.667     0.600
72  New York Knicks 2024-03-31       0.667        0.500     0.595
73  New York Knicks 2024-04-07       0.500        0.500     0.590
74  New York Knicks 2024-04-14       0.500        1.000     0.610

[75 rows x 5 columns]


In [74]:
# Import Weekly record and NBA playoff team performance data files
playoff_records_path = "data/NBA Playoff records.csv"

playoff_records_data = pd.read_csv(playoff_records_path)

# Read the CSV into a Pandas DataFrame
playoff_recordsDF = pd.DataFrame(playoff_records_data)
playoff_recordsDF.drop(columns=['Attend.','Unnamed: 7','Unnamed: 6','PTS.1','PTS','Arena','Start (ET)'],axis=1,inplace=True)

# Convert the "Date" column to datetime format
playoff_recordsDF["Date"] = pd.to_datetime(playoff_recordsDF["Date"], format="%a %b %d %Y")
# Get the week ending (on a Sunday) for each date to align with the Google search data
playoff_recordsDF['Week'] = playoff_recordsDF['Date'] + pd.to_timedelta(6 - playoff_recordsDF['Date'].dt.dayofweek, unit='D')

playoff_recordsDF = playoff_recordsDF.rename(columns={'Visitor/Neutral': 'Visitor', 'Home/Neutral': 'Home', 'Notes': 'Round'})
playoff_recordsDF = playoff_recordsDF.sort_values(by=['Home', 'Visitor'])

#Create the list of teams for filtering the playoff data
searched_teams = {"Boston Celtics", "Dallas Mavericks", "Denver Nuggets", "Minnesota Timberwolves", "New York Knicks"}

# Create a list to store the data for each row
new_data = []

# Loop through the DataFrame and add rows to the list for each team found
for index, row in playoff_recordsDF.iterrows():
    if row['Visitor'] in searched_teams:
        new_data.append({'Date': row['Date'], 'Round': row['Round'], 'Team': row['Visitor'], 'Week': row['Week']})
    if row['Home'] in searched_teams:
        new_data.append({'Date': row['Date'], 'Round': row['Round'], 'Team': row['Home'], 'Week': row['Week']})

# Convert the list of data to a new DataFrame
new_df = pd.DataFrame(new_data, columns=['Team','Week','Date', 'Round'])
new_df = new_df.sort_values(by=['Team', 'Week', 'Date'])

new_df

filtered_df = pd.DataFrame(new_df)

# Convert 'Week' to datetime for sorting
#df['Week'] = pd.to_datetime(df['Week'])

# Group by 'Week' and 'TEAM', and keep only the row with the highest 'Round' value
filtered_df = new_df.loc[new_df.groupby(['Week', 'Team'])['Round'].idxmax()]

# Reset index
#filtered_df.reset_index(drop=True, inplace=True)
filtered_df=filtered_df.sort_values(by=['Team','Week','Round'])
print(filtered_df)

#print(playoff_recordsDF)

                      Team       Week       Date  Round
9           Boston Celtics 2024-04-21 2024-04-21    1.0
10          Boston Celtics 2024-04-28 2024-04-24    1.0
50          Boston Celtics 2024-05-05 2024-04-29    1.0
0           Boston Celtics 2024-05-12 2024-05-07    2.0
13          Boston Celtics 2024-05-19 2024-05-13    2.0
7           Boston Celtics 2024-05-26 2024-05-21    3.0
40          Boston Celtics 2024-06-02 2024-05-27    3.0
4           Boston Celtics 2024-06-09 2024-06-06    4.0
14          Boston Celtics 2024-06-16 2024-06-12    4.0
44        Dallas Mavericks 2024-04-21 2024-04-21    1.0
45        Dallas Mavericks 2024-04-28 2024-04-23    1.0
46        Dallas Mavericks 2024-05-05 2024-05-01    1.0
72        Dallas Mavericks 2024-05-12 2024-05-07    2.0
26        Dallas Mavericks 2024-05-19 2024-05-13    2.0
51        Dallas Mavericks 2024-05-26 2024-05-22    3.0
24        Dallas Mavericks 2024-06-02 2024-05-28    3.0
3         Dallas Mavericks 2024-06-09 2024-06-06

In [75]:
# Read in the CSV files into DataFrames

# Perform an inner merge on 'Team' and 'Week' columns
merged_df = pd.merge(team_recordsDF, filtered_df, on=['Team', 'Week'], how='outer')

print(merged_df)

                Team       Week  PW_win_pct  Weekly_Win%  Cum_Win%       Date  \
0     Boston Celtics 2024-01-07       1.000        0.667     0.800        NaT   
1     Boston Celtics 2024-01-14       0.667        0.500     0.769        NaT   
2     Boston Celtics 2024-01-21       0.500        0.750     0.767        NaT   
3     Boston Celtics 2024-01-28       0.750        0.667     0.761        NaT   
4     Boston Celtics 2024-02-04       0.667        0.750     0.760        NaT   
..               ...        ...         ...          ...       ...        ...   
105  New York Knicks 2024-04-21         NaN          NaN       NaN 2024-04-20   
106  New York Knicks 2024-04-28         NaN          NaN       NaN 2024-04-22   
107  New York Knicks 2024-05-05         NaN          NaN       NaN 2024-04-30   
108  New York Knicks 2024-05-12         NaN          NaN       NaN 2024-05-08   
109  New York Knicks 2024-05-19         NaN          NaN       NaN 2024-05-14   

     Round  
0      NaN  
1

In [85]:
import csv

# Define the output file path
output_file = "data/teams_playoffs.csv"

# Define the header for the CSV file
header = ["Team", "Week", "PW_win_pct","Weekly_win_pct","Cum_win_pct","Round"]

# Open the output file and write the header
with open(output_file, "w", newline='') as datafile:
    writer = csv.writer(datafile)
    writer.writerow(header)

    # Iterate through the rows of the DataFrame and write each row to the CSV file
    for index, row in merged_df.iterrows():
        Team = row["Team"]
        Week = row["Week"]
        #Week = pd.to_datetime(row["Week"], format="%a %b %d %Y").strftime("%Y-%m-%d")
        #Week = pd.to_datetime(row["Week"], format="%a %b %d %Y")
        #row["Week"]+ pd.to_timedelta(6 - playoff_recordsDF['Date'].dt.dayofweek, unit='D')
        PW_win_pct = row["PW_win_pct"]
        Weekly_win_pct = row["Weekly_Win%"]
        Cum_win_pct = row["Cum_Win%"]
        Round = row["Round"]
        data_row = [Team, Week, PW_win_pct, Weekly_win_pct, Cum_win_pct, Round]
        writer.writerow(data_row)