<a href="https://colab.research.google.com/github/samityasir2005/ufc_predictor_ML/blob/main/ufc_pytorch_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install kagglehub pandas numpy torch matplotlib seaborn scikit-learn

In [None]:
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
path = kagglehub.dataset_download("mdabbert/ultimate-ufc-dataset")
print("Dataset path:", path)

In [None]:
# Load dataset and remove odds features
df = pd.read_csv(os.path.join(path, 'ufc-master.csv'))
print(f"Original shape: {df.shape}")
columns_to_remove = ['RedOdds', 'BlueOdds', 'RedDecOdds', 'BlueDecOdds',
                     'RSubOdds', 'BSubOdds', 'RKOOdds', 'BKOOdds',
                     'RedExpectedValue', 'BlueExpectedValue', 'Finish',
                     'FinishDetails', 'FinishRound', 'FinishRoundTime',
                     'TotalFightTimeSecs', 'BetterRank']
df = df.drop(columns=[col for col in columns_to_remove if col in df.columns])
print(f"Cleaned shape: {df.shape}")

In [None]:
# Create features comparing red vs blue fighters
def create_features(df):
    features = pd.DataFrame()

    # Physical advantage
    features['reach_diff'] = df['RedReachCms'] - df['BlueReachCms']

    # Experience and record
    features['wins_diff'] = df['RedWins'] - df['BlueWins']
    features['losses_diff'] = df['RedLosses'] - df['BlueLosses']

    # Win rate
    red_fights = df['RedWins'] + df['RedLosses'] + df['RedDraws']
    blue_fights = df['BlueWins'] + df['BlueLosses'] + df['BlueDraws']
    features['win_rate_diff'] = (df['RedWins'] / red_fights.replace(0, 1)) - (df['BlueWins'] / blue_fights.replace(0, 1))

    # Striking
    features['sig_str_landed_diff'] = df['RedAvgSigStrLanded'] - df['BlueAvgSigStrLanded']

    return features

X = create_features(df)
y = (df['Winner'] == 'Red').astype(int)

print(f"Features: {X.shape[1]}")
print(f"Samples: {len(X)}")
print(f"Red wins: {y.mean()*100:.1f}%")


In [None]:
# Clean data and split into training and test sets
combined = pd.concat([X, pd.Series(y, name='target')], axis=1)
combined = combined.dropna()
X_clean = combined.drop('target', axis=1)
y_clean = combined['target']

X_train, X_test, y_train, y_test = train_test_split(
    X_clean, y_clean, test_size=0.2, random_state=42, stratify=y_clean
)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Number of features: {X_train.shape[1]}")


In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test.values)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
# neural network
class UFCPredictor(nn.Module):
    def __init__(self, input_size):
        super(UFCPredictor, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UFCPredictor(X_train.shape[1]).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Model running on: {device}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters())}")


In [None]:
# Train the model
epochs = 30
train_losses = []
test_losses = []
train_accs = []
test_accs = []

print("Training started...")
for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        predictions = (outputs > 0.5).float()
        train_correct += (predictions == y_batch).sum().item()
        train_total += y_batch.size(0)

    # Evaluation phase
    model.eval()
    test_loss = 0
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)

            test_loss += loss.item()
            predictions = (outputs > 0.5).float()
            test_correct += (predictions == y_batch).sum().item()
            test_total += y_batch.size(0)

    train_losses.append(train_loss / len(train_loader))
    test_losses.append(test_loss / len(test_loader))
    train_accs.append(100 * train_correct / train_total)
    test_accs.append(100 * test_correct / test_total)

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_losses[-1]:.4f}, Train Acc: {train_accs[-1]:.2f}%, Test Acc: {test_accs[-1]:.2f}%")

print(f"\nTraining complete!")
print(f"Final test accuracy: {test_accs[-1]:.2f}%")


# Model Performance Analysis

In [None]:
# Plot training progress
plt.figure(figsize=(10, 5))
plt.plot(train_accs, label='Training Accuracy', linewidth=2)
plt.plot(test_accs, label='Test Accuracy', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Model Training Progress')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"Final Training Accuracy: {train_accs[-1]:.2f}%")
print(f"Final Test Accuracy: {test_accs[-1]:.2f}%")


In [None]:
# Evaluate model predictions
model.eval()
with torch.no_grad():
    X_test_device = X_test_tensor.to(device)
    y_pred_proba = model(X_test_device).squeeze().cpu().numpy()
    y_pred = (y_pred_proba > 0.5).astype(int)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Display confusion matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted Winner')
plt.ylabel('Actual Winner')
plt.title(f'Confusion Matrix\nAccuracy: {accuracy*100:.2f}%')
plt.xticks([0.5, 1.5], ['Blue', 'Red'])
plt.yticks([0.5, 1.5], ['Blue', 'Red'])
plt.show()

print(f"\nModel Accuracy: {accuracy*100:.2f}%")
print(f"Correctly Predicted: {(cm[0,0] + cm[1,1])} out of {len(y_test)} fights")


# Custom Fight Prediction

In [None]:
# Get fighter stats from dataset
def get_fighter_stats(name, df):
    # Search in red corner
    red = df[df['RedFighter'].str.contains(name, case=False, na=False)]
    if len(red) > 0:
        f = red.iloc[0]
        return {
            'name': f['RedFighter'],
            'height': f['RedHeightCms'],
            'reach': f['RedReachCms'],
            'age': f['RedAge'],
            'wins': f['RedWins'],
            'losses': f['RedLosses'],
            'draws': f['RedDraws'],
            'win_streak': f['RedCurrentWinStreak'],
            'sig_str': f['RedAvgSigStrLanded'],
            'td': f['RedAvgTDLanded']
        }

    # Search in blue corner
    blue = df[df['BlueFighter'].str.contains(name, case=False, na=False)]
    if len(blue) > 0:
        f = blue.iloc[0]
        return {
            'name': f['BlueFighter'],
            'height': f['BlueHeightCms'],
            'reach': f['BlueReachCms'],
            'age': f['BlueAge'],
            'wins': f['BlueWins'],
            'losses': f['BlueLosses'],
            'draws': f['BlueDraws'],
            'win_streak': f['BlueCurrentWinStreak'],
            'sig_str': f['BlueAvgSigStrLanded'],
            'td': f['BlueAvgTDLanded']
        }
    return None

In [None]:
# Example: Predict a custom matchup
fighter1_name = "Islam Makhachev"
fighter2_name = "Charles Oliveira"

f1 = get_fighter_stats(fighter1_name, df)
f2 = get_fighter_stats(fighter2_name, df)

if f1 and f2:
    # Create features matching our simplified model
    red_fights = f1['wins'] + f1['losses'] + f1['draws']
    blue_fights = f2['wins'] + f2['losses'] + f2['draws']

    matchup = pd.DataFrame([{
        'reach_diff': f1['reach'] - f2['reach'],
        'wins_diff': f1['wins'] - f2['wins'],
        'losses_diff': f1['losses'] - f2['losses'],
        'win_rate_diff': (f1['wins'] / max(red_fights, 1)) - (f2['wins'] / max(blue_fights, 1)),
        'sig_str_landed_diff': f1['sig_str'] - f2['sig_str']
    }])

    # Scale and predict
    matchup_scaled = scaler.transform(matchup)
    matchup_tensor = torch.FloatTensor(matchup_scaled).to(device)

    model.eval()
    with torch.no_grad():
        prediction = model(matchup_tensor).item()

    # Display results
    print("=" * 60)
    print("FIGHT PREDICTION")
    print("=" * 60)
    print(f"\nFighter 1: {f1['name']} ({f1['wins']}-{f1['losses']})")
    print(f"  Win Probability: {prediction*100:.1f}%")
    print(f"\nFighter 2: {f2['name']} ({f2['wins']}-{f2['losses']})")
    print(f"  Win Probability: {(1-prediction)*100:.1f}%")
    print("\n" + "-" * 60)

    if prediction > 0.5:
        print(f"PREDICTED WINNER: {f1['name']}")
        conf = prediction
    else:
        print(f"PREDICTED WINNER: {f2['name']}")
        conf = 1 - prediction

    print(f"Confidence: {conf*100:.1f}%")
    print("=" * 60)
else:
    print("Fighter not found! Check the names.")
