In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 10000

# Imbalanced probabilities
directions = ['Left', 'Center', 'Right']
probs = [0.6, 0.25, 0.15]

# Simulate kick directions
kicks = np.random.choice(directions, size=n, p=probs)
df = pd.DataFrame({'kick_direction': kicks})

# Track how many kicks since each direction occurred
df['since_left'] = 0
df['since_center'] = 0
df['since_right'] = 0

last_seen = {'Left': -1, 'Center': -1, 'Right': -1}

for i in range(n):
    for dir in directions:
        if last_seen[dir] == -1:
            df.loc[i, f'since_{dir.lower()}'] = i
        else:
            df.loc[i, f'since_{dir.lower()}'] = i - last_seen[dir]
    last_seen[df.loc[i, 'kick_direction']] = i



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix



# Features (since each direction last occurred)
X = df[['since_left', 'since_center', 'since_right']]

# Target (actual kick direction)
y = df['kick_direction']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Train multinomial logistic regression model WITHOUT weights
model = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    max_iter=1000,
    random_state=42
)

model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred, labels=['Left', 'Center', 'Right']))


              precision    recall  f1-score   support

        Left       0.61      1.00      0.76      1222
      Center       0.00      0.00      0.00       489
       Right       0.00      0.00      0.00       289

    accuracy                           0.61      2000
   macro avg       0.20      0.33      0.25      2000
weighted avg       0.37      0.61      0.46      2000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix



# Features (time since each direction was last kicked)
X = df[['since_left', 'since_center', 'since_right']]
y = df['kick_direction']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Logistic Regression WITH balanced class weights
model = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    class_weight='balanced',   # ← key difference
    max_iter=1000,
    random_state=42
)

# Fit the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred, labels=['Left', 'Center', 'Right']))




              precision    recall  f1-score   support

        Left       0.62      0.24      0.34      1222
      Center       0.25      0.21      0.23       489
       Right       0.14      0.55      0.22       289

    accuracy                           0.27      2000
   macro avg       0.34      0.33      0.26      2000
weighted avg       0.46      0.27      0.30      2000

