In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Function to add year column based on file name
def add_year_to_csv(file_path):
    # Extract the year from the filename
    file_name = os.path.basename(file_path)
    year = file_name.split('_')[1][:4]  # Assumes year is the second element and is 4 digits

    # Read the CSV
    df = pd.read_csv(file_path)

    # Add year column
    df['Year'] = year

    # Save updated CSV
    df.to_csv(file_path, index=False)
    print(f"Year added to {file_path}")

# File paths
eastern_file = "last5seasonsrecords/NBA_2024_Eastern_Conference_Standing.csv"
team_misc_stats_file = "advancedteamstats/Boston_Celtics_2023_24_Team_Misc_Stats.csv"

# Add year to both files
add_year_to_csv(eastern_file)
add_year_to_csv(team_misc_stats_file)

# Load updated datasets
eastern_standing = pd.read_csv(eastern_file)
team_misc_stats = pd.read_csv(team_misc_stats_file)

# Merge datasets on team and year
merged_data = pd.merge(eastern_standing, team_misc_stats, on=["Team", "Year"])

# Select relevant columns for features and target
features = ['W/L%', 'NetRtg', 'ORtg', 'DRtg']  # Example features
target = 'Win Finals'

# Ensure the target is binary
merged_data[target] = (merged_data[target] > 0).astype(int)

# Drop rows with missing values
merged_data = merged_data.dropna(subset=features + [target])

# Train-test split
X = merged_data[features]
y = merged_data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Plot feature importance
importances = model.feature_importances_
plt.figure(figsize=(8, 5))
plt.bar(features, importances, color="skyblue")
plt.title("Feature Importance for Championship Prediction")
plt.xlabel("Features")
plt.ylabel("Importance")
plt.show()


Year added to last5seasonsrecords/NBA_2024_Eastern_Conference_Standing.csv
Year added to advancedteamstats/Boston_Celtics_2023_24_Team_Misc_Stats.csv


KeyError: 'Team'

In [3]:
print(eastern_standing.columns)
print(team_misc_stats.columns)


Index(['Eastern Conference', 'W', 'L', 'W/L%', 'SOS', 'rSOS', 'SRS', 'Current',
       'Remain', 'Best', 'Worst', 'Playoffs', 'Division', '1', '2', '3', '4',
       '5', '6', '7', '8', 'Unnamed: 21', '1-6', '7.1', '8.1', '9', '10',
       'Out', 'Unnamed: 28', 'Win Conf', 'Win Finals', 'Year'],
      dtype='object')
Index(['Category', 'TeamR', 'Lg Rank', 'Year'], dtype='object')


In [8]:
import pandas as pd
import os

# Define the folder for team misc stats
team_misc_stats_folder = "./advancedteamstats"

# Function to extract team name from filename
def extract_team_name(filename):
    return filename.split("_")[0]  # Extract team name before the first underscore

# Add Team and Year columns to the files in the folder
for file in os.listdir(team_misc_stats_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(team_misc_stats_folder, file)
        data = pd.read_csv(file_path)
        team_name = extract_team_name(file)
        year = file.split("_")[-1].split(".")[0]  # Extract year from filename
        data['Team'] = team_name
        data['Year'] = year
        data.to_csv(file_path, index=False)

# Verify by loading one of the updated files
file_to_check = os.path.join(team_misc_stats_folder, "LAL_2020_Team_Misc_Stats.csv")
updated_data = pd.read_csv(file_to_check)
print(updated_data.head())

  Category TeamR  Lg Rank Team   Year
0        W    52      3.0  LAL  Stats
1        L    19     28.0  LAL  Stats
2       PW    48      6.0  LAL  Stats
3       PL    23      5.0  LAL  Stats
4      MOV  5.79      5.0  LAL  Stats


In [10]:
import pandas as pd
import os

# Define the folder for team misc stats
team_misc_stats_folder = "./advancedteamstats"

# Function to extract team name from filename
def extract_team_name(filename):
    return filename.split("_")[0]  # Extract team name before the first underscore

# Function to extract year from filename
def extract_year(filename):
    # Extract the second-to-last segment assuming filenames like `Boston_Celtics_2024_Team_Misc_Stats.csv`
    segments = filename.split("_")
    for segment in segments:
        if segment.isdigit():  # Check if the segment is numeric (e.g., '2024')
            return segment
    return "Unknown"  # Fallback if no year found

# Add Team and Year columns to the files in the folder
for file in os.listdir(team_misc_stats_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(team_misc_stats_folder, file)
        data = pd.read_csv(file_path)
        team_name = extract_team_name(file)
        year = extract_year(file)
        data['Team'] = team_name
        data['Year'] = year
        data.to_csv(file_path, index=False)

# Verify by loading one of the updated files
file_to_check = os.path.join(team_misc_stats_folder, "Boston_Celtics_2024_Team_Misc_Stats.csv")
updated_data = pd.read_csv(file_to_check)
print(updated_data.head())


  Category  TeamR  Lg Rank  Year    Team
0        W     64      1.0  2024  Boston
1        L     18     30.0  2024  Boston
2       PW     66      1.0  2024  Boston
3       PL     16      1.0  2024  Boston
4      MOV  11.34     29.0  2024  Boston
