# UFC Fight Prediction Model Prototyping

This notebook implements various machine learning models for UFC fight prediction using the scraped data.

## 1. Setup and Data Loading

In [None]:
# Standard imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the data
fight_stats = pd.read_csv('../scrape_ufc_stats/ufc_fight_stats.csv')
fight_results = pd.read_csv('../scrape_ufc_stats/ufc_fight_results.csv')
fighter_details = pd.read_csv('../scrape_ufc_stats/ufc_fighter_details.csv')
fighter_tott = pd.read_csv('../scrape_ufc_stats/ufc_fighter_tott.csv')

## 2. Data Preprocessing

In [None]:
# Function to clean and preprocess data
def preprocess_data(fight_stats, fight_results, fighter_details, fighter_tott):
    # TODO: Implement data cleaning and preprocessing
    # 1. Handle missing values
    # 2. Convert percentages to floats
    # 3. Extract numerical values from time-based features
    # 4. Create fighter-specific aggregated statistics
    # 5. Merge relevant features from all datasets
    pass

## 3. Feature Engineering

In [None]:
# Function to create features
def engineer_features(df):
    # TODO: Implement feature engineering
    # 1. Calculate win/loss ratios
    # 2. Create striking efficiency metrics
    # 3. Generate style matchup indicators
    # 4. Add time-based features
    pass

## 4. Model Implementation

In [None]:
# Import models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [None]:
# Function to evaluate models
def evaluate_model(model, X_train, X_test, y_train, y_test):
    # Train model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_pred)
    }
    
    return metrics

## 5. Model Comparison

In [None]:
# Define models to test
models = {
    'logistic': LogisticRegression(),
    'random_forest': RandomForestClassifier(),
    'xgboost': XGBClassifier(),
    'svm': SVC(probability=True)
}

# Store results
results = {}

## 6. Results Analysis

In [None]:
# Function to visualize results
def plot_results(results):
    # TODO: Implement visualization of model comparisons
    # 1. Bar plots of metrics
    # 2. ROC curves
    # 3. Feature importance plots
    pass

## 7. Model Selection and Tuning

In [None]:
# Hyperparameter tuning for best model
from sklearn.model_selection import GridSearchCV

# TODO: Implement grid search for best performing model