In [None]:
import pandas as pd 

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression

# Load datasets
fighter_stats = pd.read_csv("fighter_stats_cleaned.csv")
fights = pd.read_csv("fights_cleaned.csv") 

In [None]:
mod = LinearRegression()

In [None]:
fighter_features = [
    'wins', 'losses', 'draws', 'height', 'weight', 'reach', 'stance',
    'strikes_landed_per_min', 'striking_accuracy', 'strikes_absorbed_per_min',
    'striking_defense', 'takedown_avg', 'takedown_accuracy', 
    'takedown_defense', 'submission_avg'
]

def get_fighter_features(fighter_stats_data, fighter_id, prefix):
    # Search for the fighter in the dataset 
    fighter_row = fighter_stats_data[fighter_stats_data['fighter_id'] == fighter_id]
    if len(fighter_row) == 0:
        return None 
    
    # Get the fighter's features
    stats = {} 
    for feature in fighter_features: 
        # Add the feature to the stats dictionary with fighter prefix
        stats[f'{prefix}_{feature}'] = fighter_row[feature].values[0]
    return stats


In [None]:
X_data = []
y_data = []

for idx, fight in fights.iterrows():

    fighter1_id = fight['fighter1_id']
    fighter2_id = fight['fighter2_id']

    winner_id = fight['winner_id']

    # Skip if winner_id is NaN (no winner) 
    if pd.isna(winner_id):
        continue

    # Get individual fighter features for f1 & f2
    f1_stats = get_fighter_features(fighter_stats, fighter1_id, 'f1')
    f2_stats = get_fighter_features(fighter_stats, fighter2_id, 'f2')

    # Skip if either fighter is missing stats
    if f1_stats is None or f2_stats is None:
        continue

    # Combine features for f1 & f2
    fight_features = {**f1_stats, **f2_stats}

    for feature in fighter_features:
        f1_val = fight_features[f'f1_{feature}']
        f2_val = fight_features[f'f2_{feature}']

        # Calculate difference between f1 & f2 features
        if pd.isna(f1_val) or pd.isna(f2_val):
            fight_features[f'diff_{feature}'] = 0 
        else:
            fight_features[f'diff_{feature}'] = f1_val - f2_val

    # Target 1 if fighter1 wins, 0 if fighter2 wins
    target = 1 if winner_id == fighter1_id else 0

    # Contains features for both fighters (f1 & f2 'fighter_features', difference in features)
    X_data.append(fight_features)
    y_data.append(target)

X_data = pd.DataFrame(X_data)
y_data = pd.DataFrame(y_data)
