In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from xgboost import XGBRegressor
import itertools
import joblib

from sklearn.utils.class_weight import compute_class_weight


In [7]:


class LabelEncoderWrapper(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.label_encoder = LabelEncoder()

    def fit(self, X, y=None):
        self.label_encoder.fit(X.squeeze())  # Squeeze the input X to handle a single-column DataFrame
        return self

    def transform(self, X, y=None):
        return self.label_encoder.transform(X.squeeze()).reshape(-1, 1)  # Squeeze and reshape the output

df = pd.read_csv('../Resources/finalv1.csv')

# Filter out the zero scores
df_non_zero = df[df['score'] != 0]

# Define the features and the target
X = df_non_zero[['rider', 'bull', 'vsleft_perc', 'vsright_perc',
       'vsavg_bull_power', 'hand', 'high_score', 'time', 'round', 'bull_power_rating', 'bullscore', 'buckoff_perc_vs_rh_riders',
       'buckoff_perc_vs_lh_riders']]
y = df_non_zero['score']

# Get the unique class labels
classes = df_non_zero['score'].unique()

# Compute class weights
class_weights = compute_class_weight('balanced', classes=classes, y=y)

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['rider']),
        ('num', LabelEncoderWrapper(), ['bull'])
    ])

# Preprocess the data
X_preprocessed = preprocessor.fit_transform(X)

# Train the XGBoost regressor with class weighting
model = XGBRegressor(objective='reg:squarederror', eval_metric='rmse', scale_pos_weight=(1 / class_weights[1]))
model.fit(X_preprocessed, y)

# Get user input for the riders and bulls
riders = [input("Enter rider {}: ".format(i+1)) for i in range(2)]
bulls = [input("Enter bull {}: ".format(i+1)) for i in range(2)]

# Generate unique combinations of riders and bulls
combinations = list(itertools.product(riders, bulls))
unique_combinations = list(set(combinations))

# Prepare the data for prediction
new_data = pd.DataFrame(unique_combinations, columns=['rider', 'bull'])

# Apply the same preprocessing to the new data
new_data_preprocessed = preprocessor.transform(new_data)

# Make predictions for the new data
predictions = model.predict(new_data_preprocessed)

# Create a result DataFrame
result_df = pd.DataFrame(unique_combinations, columns=['Rider', 'Bull'])
result_df['Predicted Score'] = predictions
result_df = result_df.sort_values(by='Predicted Score', ascending=False).reset_index(drop=True)

# Save the preprocessor and model
with open('preprocessor.pkl', 'wb') as f:
    joblib.dump(preprocessor, f)

with open('model.pkl', 'wb') as f:
    joblib.dump(model, f)

# Display the result
print(result_df)

               Rider        Bull  Predicted Score
0    Jose Vitor Leme  Blood Moon        87.567245
1    Jose Vitor Leme  Ivy League        87.331024
2  Austin Richardson  Blood Moon        85.473808
3  Austin Richardson  Ivy League        85.473808
