In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("ufc-master-elo.csv").reset_index(drop=True)  # Change to your actual file

# Basic Details
print("\n Dataset Shape:", df.shape)
print("\n Column Info:", df.info())
print("\n Descriptive Statistics:", df.describe())
print("\n Missing Values:", df.isnull().sum())
excluded_na_drop = ["FinishDetails"]
df = df.dropna(subset=[col for col in df.columns if col not in excluded_na_drop])
print("\n Df without Nan values:", df.shape)

# Feature Distributions
df.hist(figsize=(12, 8), bins=30)
plt.tight_layout()
plt.show()

# Correlation Matrix
label_encoder_winner = LabelEncoder()
label_encoder_stance = LabelEncoder()
label_encoder_finish_method = LabelEncoder()
label_encoder_finish_details = LabelEncoder()

df['RedStance'] = label_encoder_stance.fit_transform(df['RedStance'])
df['BlueStance'] = label_encoder_stance.fit_transform(df['BlueStance'])
df['Finish'] = label_encoder_finish_method.fit_transform(df['Finish'])
df['FinishDetails'] = label_encoder_finish_details.fit_transform(df['FinishDetails'])
df['Winner'] = label_encoder_winner.fit_transform(df['Winner'])
df = df.drop(columns=["Date","Location","Country","TitleBout","WeightClass","Gender","RedFighter","BlueFighter","BetterRank", "FinishRoundTime", "Unnamed: 0"] ) #"unnamed: 0"

correlation_matrix = df.corr()
plt.figure(figsize=(25, 20))
sns.heatmap(
    correlation_matrix, 
    annot=True,  
    fmt=".2f",  
    cmap="coolwarm",  
    center=0,  
    annot_kws={"size": 10}  
)
plt.xticks(rotation=45, ha="right", fontsize=12)
plt.yticks(rotation=0, fontsize=12)
plt.title("Correlation Heatmap of Fight Metrics", fontsize=16)
plt.show()

plt.figure(figsize=(12, 14))
outcome_corr = correlation_matrix['Winner'].drop('Winner')  # Drop self-correlation
outcome_corr = outcome_corr.sort_values(ascending=False)  # Sort by strength

sns.barplot(x=outcome_corr.values, y=outcome_corr.index, palette='coolwarm')
plt.title("Feature Correlation with Fight Outcome", fontsize=16)
plt.xlabel("Correlation")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()



In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("april2025 swapped.csv").reset_index(drop=True)  # Change to your actual file
df = df.drop(columns = ["RED_ELO_BEFORE","BLUE_ELO_BEFORE","RED_ELO_PEAK","BLUE_ELO_PEAK"])

# Basic Details
print("\n Dataset Shape:", df.shape)
print("\n Column Info:", df.info())
print("\n Descriptive Statistics:", df.describe())
print("\n Missing Values:", df.isnull().sum())
excluded_na_drop = ["FinishDetails"]
df = df.dropna(subset=[col for col in df.columns if col not in excluded_na_drop])
print("\n Df without Nan values:", df.shape)

# Feature Distributions
df.hist(figsize=(24, 16), bins=30)
plt.tight_layout()
plt.show()

# Correlation Matrix
label_encoder_outcome = LabelEncoder()
label_encoder_method = LabelEncoder()
df['OUTCOME'] = label_encoder_outcome.fit_transform(df['OUTCOME'])
df['METHOD'] = label_encoder_method.fit_transform(df['METHOD'])

# Define features and target
df= df.drop(columns=["METHOD",  "BOUT", "EVENT", "WEIGHTCLASS","REFEREE", "DETAILS", "URL", "TIME FORMAT", "TIME", "Unnamed: 0", "Unnamed: 0.1", "ROUND"])  #"Unnamed: 0", "EVENT", "WEIGHTCLASS","REFEREE", "DETAILS", "URL", "TIME FORMAT",

def to_pascal_case(s):
    return ''.join(word.capitalize() for word in s.strip().replace('_', ' ').split())

df.columns = [to_pascal_case(col) for col in df.columns]

correlation_matrix = df.corr()
plt.figure(figsize=(50, 40))
sns.heatmap(
    correlation_matrix, 
    annot=True,  
    fmt=".2f",  
    cmap="coolwarm",  
    center=0,  
    annot_kws={"size": 10}  
)
plt.xticks(rotation=45, ha="right", fontsize=12)
plt.yticks(rotation=0, fontsize=12)
plt.title("Correlation Heatmap of Fight Metrics", fontsize=16)
plt.show()

plt.figure(figsize=(12, 14))
outcome_corr = correlation_matrix['Outcome'].drop('Outcome')  # Drop self-correlation
outcome_corr = outcome_corr.sort_values(ascending=False)  # Sort by strength

sns.barplot(x=outcome_corr.values, y=outcome_corr.index, palette='coolwarm_r')
plt.title("Feature Correlation with Fight Outcome", fontsize=16)
plt.xlabel("Correlation")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()
