In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Suppress all pandas warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

## Importing DataSets

In [None]:
ufc_masterdf = pd.read_csv('../datasets/raw/ufc-master.csv')
ufc_masterdf.head()

## Data Preperation and Cleaning



In [None]:
ufc_masterdf.info

In [None]:
# Checking for missing values in the dataset
missing_values = ufc_masterdf.isnull().sum().sort_values(ascending=False)

# Display columns with missing values
missing_values[missing_values > 0]


In [None]:
# Dropping columns with a high percentage of missing values that are not essential for the analysis
columns_to_drop = ['BWFeatherweightRank', 'RWFeatherweightRank', 'BPFPRank', 'BWFlyweightRank', 'RWFlyweightRank',
                   'BWStrawweightRank', 'BWBantamweightRank', 'BBantamweightRank', 'BWelterweightRank',
                   'BLightHeavyweightRank', 'BLightweightRank', 'BFeatherweightRank', 'BFlyweightRank',
                   'BMiddleweightRank', 'RWStrawweightRank', 'BHeavyweightRank', 'RWBantamweightRank', 'RFeatherweightRank',
                   'RLightHeavyweightRank', 'RMiddleweightRank', 'RBantamweightRank', 'RHeavyweightRank', 'RFlyweightRank',
                   'RLightweightRank', 'RWelterweightRank', 'RPFPRank', 'BMatchWCRank', 'RMatchWCRank', 'FinishDetails']

ufc_masterdf = ufc_masterdf.drop(columns=columns_to_drop)

# Dropping rows with missing values in essential columns for analysis
ufc_cleaned = ufc_masterdf.dropna(subset=['RedOdds', 'BlueOdds', 'TotalFightTimeSecs', 'RedAvgSigStrLanded', 'BlueAvgSigStrLanded',
                               'RedAvgTDLanded', 'BlueAvgTDLanded', 'RedAvgSubAtt', 'BlueAvgSubAtt', 'RedAvgSigStrPct',
                               'BlueAvgSigStrPct', 'RedAvgTDPct', 'BlueAvgTDPct', 'Winner', 'WeightClass'])


# Check if we've sufficiently cleaned the data by looking at missing values again
ufc_cleaned.isnull().sum().sum(), ufc_cleaned.shape

# Verifying the cleaned data
ufc_cleaned.info()

## UFC Winning Visualizations

In [None]:
# Creating calculated fields
ufc_cleaned['RedExperience'] = ufc_cleaned['RedAvgSigStrLanded'] + ufc_cleaned['RedAvgTDLanded'] + ufc_cleaned['RedAvgSubAtt']
ufc_cleaned['BlueExperience'] = ufc_cleaned['BlueAvgSigStrLanded'] + ufc_cleaned['BlueAvgTDLanded'] + ufc_cleaned['BlueAvgSubAtt']
ufc_cleaned['RedTotalAccuracy'] = (ufc_cleaned['RedAvgSigStrPct'] + ufc_cleaned['RedAvgTDPct']) / 2
ufc_cleaned['BlueTotalAccuracy'] = (ufc_cleaned['BlueAvgSigStrPct'] + ufc_cleaned['BlueAvgTDPct']) / 2


### Win Rate by Weight Class

In [None]:
weight_class_win_rate = ufc_cleaned.groupby('WeightClass')['Winner'].apply(lambda x: (x == 'Red').mean())
# Plotting win rate by weight class
plt.figure(figsize=(12, 6))
weight_class_win_rate.plot(kind='bar', color='skyblue')
plt.title('Win Rate by Weight Class')
plt.xlabel('Weight Class')
plt.ylabel('Win Rate')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

This bar chart displays the win rate for each weight class, indicating which categories have higher or lower winning percentages. Certain weight classes might showcase different dynamics, like endurance and strength, which could impact win likelihood.

### Effect of Strike Accuracy on Winning

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(ufc_cleaned, x='RedAvgSigStrPct', kde=True, color='orange', label='Strike Accuracy of Winners')
plt.title('Strike Accuracy and Win Rate')
plt.xlabel('Strike Accuracy (%)')
plt.ylabel('Frequency')
plt.legend()
plt.tight_layout()
plt.show()


The histogram shows the distribution of winners based on their strike accuracy, suggesting that higher accuracy generally correlates with winning outcomes. This finding emphasizes the importance of landing precise strikes in UFC success.

### Takedown Accuracy and Win Rate

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(ufc_cleaned, x='RedAvgTDPct', kde=True, color='purple', label='Takedown Accuracy of Winners')
plt.title('Takedown Accuracy and Win Rate')
plt.xlabel('Takedown Accuracy (%)')
plt.ylabel('Frequency')
plt.legend()
plt.tight_layout()
plt.show()

This histogram reveals the winning distribution according to takedown accuracy, highlighting that fighters with greater takedown accuracy tend to win more often. This suggests that controlling opponents on the ground can provide a competitive edge.

### Fighting Style Distribution Among Winners

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(data=ufc_cleaned, x='RedStance', color='cyan', order=ufc_cleaned['RedStance'].value_counts().index)
plt.title('Fighting Style Distribution Among Winners')
plt.xlabel('Fighting Style')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

This bar chart shows the distribution of fighting styles (stances) among winners, with a focus on traditional stances like orthodox and southpaw. Popularity of certain stances among winners may suggest strategic advantages in fight tactics or defense mechanisms.

### Age and Win Rate

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(ufc_cleaned['RedAge'], kde=True, color='green', label='Age of Winners')
plt.title('Age and Win Rate')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.legend()
plt.tight_layout()
plt.show()

The histogram illustrates the age distribution of winners, indicating whether younger or older fighters tend to win more frequently. Observing any peak in certain age ranges can provide insights into the potential advantage of experience and physical condition at different ages.

## UFC Fight Data Visualizations

In [None]:
# Create a new column for "Favorite" and "Underdog"
ufc_cleaned['FavoriteOdds'] = ufc_cleaned[['RedOdds', 'BlueOdds']].min(axis=1)
ufc_cleaned['UnderdogOdds'] = ufc_cleaned[['RedOdds', 'BlueOdds']].max(axis=1)

# Create a new column for whether the favorite fighter won
ufc_cleaned['FavoriteWin'] = ((ufc_cleaned['RedOdds'] < ufc_cleaned['BlueOdds']) & (ufc_cleaned['Winner'] == 'Red')) | \
                             ((ufc_cleaned['BlueOdds'] < ufc_cleaned['RedOdds']) & (ufc_cleaned['Winner'] == 'Blue'))

In [None]:
#Favorite vs. Underdog Win Probability
plt.figure(figsize=(8, 6))
ufc_cleaned['FavoriteWin'].value_counts().plot(kind='bar', color=['green', 'red'])
plt.title('Favorite vs Underdog Win Probability', fontsize=14)
plt.xlabel('Favorite Win (1 = Yes, 0 = No)', fontsize=12)
plt.ylabel('Number of Fights', fontsize=12)
plt.xticks(rotation=0)
plt.show()


In [None]:
# 2. Distribution of Odds for Favorite and Underdog
plt.figure(figsize=(8, 6))
ufc_cleaned[['FavoriteOdds', 'UnderdogOdds']].plot(kind='hist', alpha=0.5, bins=50, color=['green', 'red'])
plt.title('Distribution of Betting Odds (Favorite vs Underdog)', fontsize=14)
plt.xlabel('Betting Odds', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.show()

In [None]:
# 1. Finish Round Distribution
plt.figure(figsize=(8, 6))
ufc_cleaned['FinishRound'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title('Distribution of Finish Rounds', fontsize=14)
plt.xlabel('Finish Round', fontsize=12)
plt.ylabel('Number of Fights', fontsize=12)
plt.xticks(rotation=0)
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
ufc_cleaned.groupby('Finish')['TotalFightTimeSecs'].mean().sort_values().plot(kind='bar', color='orange')
plt.title('Average Fight Time by Finish Method', fontsize=14)
plt.xlabel('Finish Method', fontsize=12)
plt.ylabel('Average Fight Time (seconds)', fontsize=12)
plt.xticks(rotation=45)
plt.show()


In [None]:
# 1. Top Fighters by Number of Fights (Red + Blue combined)
top_fighters = pd.concat([ufc_cleaned['RedFighter'], ufc_cleaned['BlueFighter']])
top_fighter_counts = top_fighters.value_counts().nlargest(10)

plt.figure(figsize=(10, 6))
top_fighter_counts.plot(kind='barh', color='teal')
plt.title('Top 10 Fighters by Number of Fights', fontsize=14)
plt.xlabel('Number of Fights', fontsize=12)
plt.ylabel('Fighter', fontsize=12)
plt.show()

In [None]:
# 3. Average Odds by Top Fighters (Red + Blue combined)
avg_odds_fighter = pd.concat([
    ufc_cleaned[['RedFighter', 'RedOdds']].rename(columns={'RedFighter': 'Fighter', 'RedOdds': 'Odds'}),
    ufc_cleaned[['BlueFighter', 'BlueOdds']].rename(columns={'BlueFighter': 'Fighter', 'BlueOdds': 'Odds'})
])
avg_odds_fighter = avg_odds_fighter.groupby('Fighter')['Odds'].mean().nlargest(10)

plt.figure(figsize=(10, 6))
avg_odds_fighter.plot(kind='barh', color='purple')
plt.title('Top 10 UnderDogs by Average Betting Odds', fontsize=14)
plt.xlabel('Average Odds', fontsize=12)
plt.ylabel('Fighter', fontsize=12)
plt.show()

In [None]:
# 3. Average Odds by Top Fighters (Red + Blue combined)
avg_odds_fighter_favorite = pd.concat([
    ufc_cleaned[['RedFighter', 'RedOdds']].rename(columns={'RedFighter': 'Fighter', 'RedOdds': 'Odds'}),
    ufc_cleaned[['BlueFighter', 'BlueOdds']].rename(columns={'BlueFighter': 'Fighter', 'BlueOdds': 'Odds'})
])
avg_odds_fighterfav = avg_odds_fighter_favorite.groupby('Fighter')['Odds'].mean().nsmallest(10)

plt.figure(figsize=(10, 6))
avg_odds_fighterfav.plot(kind='barh', color='purple')
plt.title('Top 10 Favorites by Average Betting Odds', fontsize=14)
plt.xlabel('Average Odds', fontsize=12)
plt.ylabel('Fighter', fontsize=12)
plt.show()

In [None]:
total_fights = pd.concat([ufc_cleaned['RedFighter'], ufc_cleaned['BlueFighter']]).value_counts()

# Counting wins for each fighter (whether they won as Red or Blue)
fighter_wins = pd.concat([
    ufc_cleaned[ufc_cleaned['Winner'] == 'Red']['RedFighter'],
    ufc_cleaned[ufc_cleaned['Winner'] == 'Blue']['BlueFighter']
]).value_counts()

# Calculating win percentage for each fighter
win_percentage = (fighter_wins / total_fights) * 100

# Selecting fighters with at least a certain number of fights (e.g., 10) to avoid small sample sizes
min_fights = 10
top_fighters_win_percentage = win_percentage[total_fights >= min_fights].nlargest(10)

# Plotting top fighters by win percentage
plt.figure(figsize=(10, 6))
top_fighters_win_percentage.plot(kind='barh', color='goldenrod')
plt.title('Top 10 Fighters by Win Percentage (Minimum 10 Fights)', fontsize=14)
plt.xlabel('Win Percentage (%)', fontsize=12)
plt.ylabel('Fighter', fontsize=12)
plt.show()
