In [1]:
# check rows for each data file per season.  If totals aren't the same, don't procede - find out why the numbers don't match and fix
import os
import pandas as pd

# List of prefixes
prefixes = ["14-15", "15-16", "16-17", "17-18", "18-19", "19-20", "20-21", "21-22", "22-23"]

for prefix in prefixes:
    # List of file names with the current prefix
    file_names = [f"{prefix}_BasicData.csv", f"{prefix}_AdvancedData.csv", f"{prefix}_MiscData.csv", f"{prefix}_TOIData.csv"]

    # Create an empty list to store DataFrames
    dfs = []

    # Read each CSV file, set 'UID' and 'season' as indices, and store the DataFrame in the list
    for file_name in file_names:
        file_path = os.path.join("HockeyRef_Data_Files", file_name)
        df = pd.read_csv(file_path)
        df.set_index(['UID', 'season'], inplace=True)
        dfs.append(df)

    # Check if all DataFrames have the same number of rows
    row_counts = [len(df) for df in dfs]
    if len(set(row_counts)) == 1:
        print(f"All DataFrames for {prefix} have the same number of rows.")
    else:
        print(f"DataFrames for {prefix} have different numbers of rows. Please check the data.")

    # Continue with the merge only if all DataFrames have the same number of rows
    if len(set(row_counts)) == 1:
        # Create an empty DataFrame to store the combined data
        combined_df = pd.DataFrame()

        # Merge the DataFrames based on their indices
        for df in dfs:
            combined_df = pd.merge(combined_df, df, left_index=True, right_index=True, how="outer")

        # Resetting indices to default (0, 1, 2, ...) after the merge
        combined_df.reset_index(inplace=True)

        # Save the combined DataFrame to a new CSV file
        combined_file_path = os.path.join("HockeyRef_Data_Files", f"{prefix}_CombinedData.csv")
        combined_df.to_csv(combined_file_path, index=False)

        # Display the combined DataFrame
        print(combined_df)





All DataFrames for 14-15 have the same number of rows.


ValueError: cannot join with no overlapping index names