In [6]:
import pandas as pd
from sklearn.utils import resample

# Load the dataset
df = pd.read_csv("playerOutput.csv")

# Check the structure of the dataframe
print(df.head())

# Define the column names
col1 = 'leftHeld'
col2 = 'RightHeld'

# Count the occurrences of each combination
combination_counts = df.groupby([col1, col2]).size().reset_index(name='counts')
print(combination_counts)

# Define the desired proportion for each combination
desired_proportion = 1  # 4x the data across all categories

# Calculate the number of samples for each combination to achieve the desired proportion
total_samples = len(df)
desired_counts = int(total_samples * desired_proportion)

# Resample each combination to the desired count
def resample_combination(df, col1_value, col2_value, n_samples):
    subset = df[(df[col1] == col1_value) & (df[col2] == col2_value)]
    if len(subset) > n_samples:
        return resample(subset, replace=False, n_samples=n_samples, random_state=42)
    else:
        return resample(subset, replace=True, n_samples=n_samples, random_state=42)

# Create a balanced dataset
balanced_df = pd.concat([
    resample_combination(df, True, False, desired_counts),
    resample_combination(df, False, True, desired_counts),
    resample_combination(df, False, False, desired_counts)
])

# Shuffle the balanced dataset
balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Check the new proportions
new_combination_counts = balanced_df.groupby([col1, col2]).size().reset_index(name='counts')
print(new_combination_counts)

# Save the balanced dataset to a new CSV file
balanced_df.to_csv("balanced_playerOutput.csv", index=False)

   Unnamed: 0  ball_x  ball_y  ball_dx  ball_dy  paddle_x  leftHeld  RightHeld
0           0     400     300       -5        5       350     False      False
1           1     395     305       -5        5       350     False      False
2           2     390     310       -5        5       350     False      False
3           3     385     315       -5        5       350     False      False
4           4     380     320       -5        5       350     False      False
   leftHeld  RightHeld  counts
0     False      False    2032
1     False       True     586
2      True      False     581
3      True       True       7
   leftHeld  RightHeld  counts
0     False      False    3206
1     False       True    3206
2      True      False    3206
3      True       True    3206
