In [None]:
import numpy as np
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import BCEWithLogitsLoss
from sklearn.model_selection import train_test_split
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv("/kaggle/input/ufc-dataset/ufc-master.csv")
df.head()

In [None]:
clean_df = df.dropna(subset=['Winner']) 
clean_df.shape 

In [None]:
clean_df["target"] = df["Winner"].map({"Red": 1.0, "Blue": 0.0})

In [None]:
print(clean_df["target"])
print(clean_df["Winner"])

In [None]:
cols_to_drop = ["Location", "Country", "LoseStreakDif", "WinStreakDif", "LongestWinStreakDif", "WinDif", "LossDif", "TotalRoundDif", 
                "TotalTitleBoutDif", "KODif", "SubDif", "AvgSubAttDif", "AvgTDDif", "EmptyArena", "BKOOdds", "RKOOdds", "BMatchWCRank",
                'RMatchWCRank', 'RWFlyweightRank', 'RWFeatherweightRank', 'RWStrawweightRank', 'RWBantamweightRank', 'RHeavyweightRank', 
                'RLightHeavyweightRank', 'RMiddleweightRank', 'RWelterweightRank', 'RLightweightRank', 'RFeatherweightRank', 'RBantamweightRank', 
                'RFlyweightRank', 'RPFPRank', 'BWFlyweightRank', 'BWFeatherweightRank', 'BWStrawweightRank', 'BWBantamweightRank',
                'BHeavyweightRank', 'BLightHeavyweightRank', 'BMiddleweightRank', 'BWelterweightRank', 'BLightweightRank', 'BFeatherweightRank',
                'BBantamweightRank', 'BFlyweightRank', 'BPFPRank', 'BetterRank', 'RedDecOdds', 'BlueDecOdds', 'RSubOdds', 'BSubOdds'
                ,'Date','Finish','FinishDetails','FinishRound','FinishRoundTime', 'RedFighter', 'BlueFighter', 'Winner']

In [None]:
clean_df = clean_df.drop(cols_to_drop, axis=1, errors='ignore')  # safe drop

In [None]:
print(clean_df.shape)
print(clean_df["target"])

In [None]:
numeric_cols = clean_df.select_dtypes(include=['float64','int64']).columns
clean_df[numeric_cols] = clean_df[numeric_cols].fillna(clean_df[numeric_cols].median())

In [None]:
len(numeric_cols)

In [None]:
categorical_cols = clean_df.select_dtypes(include=['object']).columns
clean_df[categorical_cols] = clean_df[categorical_cols].fillna("Unknown")

In [None]:
print(categorical_cols)

In [None]:
categorical_cols = ["WeightClass", "Gender", "BlueStance", "RedStance"]

In [None]:
clean_df = pd.get_dummies(clean_df, columns=categorical_cols, drop_first=True)

In [None]:
clean_df.head()

In [None]:
class FightersDataset(Dataset):
    def __init__(self, dataframe, label_col="target"):
        df = dataframe.copy()
        self.X = df.drop(label_col, axis=1).fillna(0).astype("float32").values
        self.y = df[label_col].values.astype("float32")

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx])
        y = torch.tensor(self.y[idx])
        return x, y

In [None]:
dataset = FightersDataset(clean_df)
print(len(dataset))

In [None]:
train_df, val_df = train_test_split(clean_df, test_size=0.2, random_state=42)

In [None]:
# we need to use this to train
available_features = [
    'BlueAvgSigStrLanded', 'BlueAvgSigStrPct', 'BlueAvgSubAtt', 
    'BlueAvgTDLanded', 'BlueAvgTDPct', 'BlueWinsByKO', 'BlueWinsBySubmission',
    'RedAvgSigStrLanded', 'RedAvgSigStrPct', 'RedAvgSubAtt',
    'RedAvgTDLanded', 'RedAvgTDPct', 'RedWinsByKO', 'RedWinsBySubmission'
]

In [None]:
X = train_df[available_features].values
y = train_df['target'].values

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
train_ds = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_ds = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))

In [None]:
# train_ds = FightersDataset(train_df, label_col="target")
# val_ds = FightersDataset(val_df, label_col="target")

In [None]:
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

In [None]:
class FightPredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)

In [None]:
# model = FightPredictor(input_dim=train_df.drop(columns=["target"]).shape[1])
model = FightPredictor(input_dim=14)

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [None]:
for epoch in range(60):
    model.train()
    for X, y in train_loader:
        optimizer.zero_grad()
        preds = model(X).squeeze(1)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: loss={loss.item():.4f}")

In [None]:
from torch.nn.functional import sigmoid

model.eval()
correct, total = 0, 0
with torch.no_grad():
    for X, y in val_loader:
        preds = sigmoid(model(X)).squeeze(1)
        pred_labels = (preds > 0.5).float()
        correct += (pred_labels == y).sum().item()
        total += y.size(0)

print("Validation accuracy:", correct / total)

In [None]:
print(train_df.shape)
print(X_train.shape)

In [None]:
save_dir = "/kaggle/working"
torch.save({"model_state_dict": model.state_dict()}, os.path.join(save_dir, "predictor.pt"))
metadata = {
    "input_dim": len(available_features),
    "feature_names": available_features
}
with open(os.path.join(save_dir, "predictor_meta.json"), "w") as f:
    json.dump(metadata, f)