<a href="https://colab.research.google.com/github/suraj4502/Explaratory_Data_Analysis/blob/main/UFC_Fighters_ANALYSIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

In [2]:
pd.set_option('display.max_columns', None)
pio.templates.default_config = {
    'layout': {
        'autosize': False,  # Disable automatic sizing
        'width': 600,       # Set the desired width
        'height': 550       # Set the desired height
    }
}




# DATA Collection.

In [3]:

url = "https://mma-stats.p.rapidapi.com/search"
headers = {
    "X-RapidAPI-Key": "1615469235msh64a5600535e4d25p16ba20jsn283759412d3b",
    "X-RapidAPI-Host": "mma-stats.p.rapidapi.com"
}

age_range = range(20, 61)  # Age range from 20 to 60 (inclusive)
data = []

for age in age_range:
    querystring = {"age": str(age)}
    response = requests.get(url, headers=headers, params=querystring)

    if response.status_code == requests.codes.ok:
        fighters = response.json().get('results')
        for fighter in fighters:
            flattened_fighter = {}

            def flatten_tree(tree, parent_key=''):
                if isinstance(tree, dict):
                    for key, value in tree.items():
                        new_key = parent_key + '_' + key if parent_key else key
                        flatten_tree(value, new_key)
                else:
                    flattened_fighter[parent_key] = tree

            flatten_tree(fighter)
            data.append(flattened_fighter)

df = pd.DataFrame(data)  # Create a DataFrame from the retrieved data

# Store the DataFrame in an Excel file
output_file = "fighter_details.xlsx"
df.to_excel(output_file, index=False)

print("Data has been stored in", output_file)


Data has been stored in fighter_details.xlsx


# Data Preprocessing.

In [4]:
def find_fighter(desired_name, fighters, threshold=80):
    # Calculate the fuzzy match score for each name in the DataFrame
    fighters['match_score'] = fighters['Name'].apply(lambda x: fuzz.ratio(desired_name, x))

    # Filter the DataFrame based on the match score threshold
    similar_fighters = fighters[fighters['match_score'] >= threshold]

    # Sort the DataFrame by match score in descending order
    similar_fighters = similar_fighters.sort_values('match_score', ascending=False)

    result = similar_fighters.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,20,21,22,23,
                                      24,25,26,27,28,37,40,41,44,45,46,47,48,49,50,51,52,53]]

    return result

In [5]:
def get_country(text):
    if isinstance(text, float):  # Check if the text is of float data type
        return None

    words = text.split(",")
    if len(words) > 1:
        value = words[1].strip()
        return value
    elif len(words) == 1:
        value = words[0].strip()
        return value

    return None

df['country'] = df['Bio Data_Hometown'].apply(get_country)

In [6]:
# Convert necessary columns to numeric data type
numeric_columns = ['Division Body_Wins', 'Division Body_Losses','Division Body_Draws',
                   'Bio Data_Age', 'Bio Data_Height', 'Bio Data_Weight', 'Bio Data_Reach',
                   'Bio Data_Leg reach','Sig. Strikes Landed','Takedowns Landed', 'Records_Wins by Submission', 'Records_Wins by Knockout']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

In [7]:
value_mapping = {'Boxer' : "Boxing", 'Brazilian Jiu-Jitsu' : "Jiu-Jitsu", 'Grappler' : "Jiu-Jitsu",
                 'Kung-Fu':"Kung Fu", 'Wrestler' :"Wrestling" ,
                 }

df['Bio Data_Fighting style'] = df['Bio Data_Fighting style'].replace(value_mapping)

In [8]:
df['Division Title'] = df['Division Title'].str.rsplit(' ', 1).str[0]

  df['Division Title'] = df['Division Title'].str.rsplit(' ', 1).str[0]


In [9]:
df['Total_fights']= df['Division Body_Wins']+df['Division Body_Losses']+df['Division Body_Draws']
df['Fighter_score'] = df['Division Body_Wins']/(df['Division Body_Wins']+df['Division Body_Losses']+df['Division Body_Draws'])

## Data Analysis.

In [10]:
# Shape of the data
df.shape

(2564, 53)

In [11]:
#Checking for null values
df.isna().sum().sort_values(ascending=False).head(10)

Records_Former Champion         2563
Records_Title Defenses          2552
Records_Fight Win Streak        2482
Records_Wins by Decision        2275
Records_First Round Finishes    1967
Takedowns Landed                1753
Last Fight_Date                 1747
Last Fight_Event                1747
Last Fight_Fight Number         1747
Last Fight_Matchup              1747
dtype: int64

In [12]:
# Checking for duplicate values
duplicates = df[df.duplicated(subset=['Name','Nickname'])]
len(duplicates)

0

In [13]:
#Description of the numerical columns
df.describe()

Unnamed: 0,Division Body_Wins,Division Body_Losses,Division Body_Draws,Bio Data_Age,Bio Data_Height,Bio Data_Weight,Bio Data_Reach,Bio Data_Leg reach,Sig. Strikes Landed,Takedowns Landed,Records_Wins by Knockout,Records_Wins by Submission,Total_fights,Fighter_score
count,2522.0,2522.0,2522.0,2564.0,2331.0,2489.0,1692.0,1432.0,2345.0,811.0,988.0,871.0,2522.0,2351.0
mean,12.013481,4.501983,0.171293,36.00312,68.181896,164.705424,71.577074,39.924532,233.260554,7.934649,6.211538,4.887486,16.686757,0.689625
std,8.350739,3.635325,0.57379,7.120445,12.060913,41.89191,4.594404,2.605887,295.848462,10.819937,4.083821,3.90063,11.518306,0.199184
min,0.0,0.0,0.0,20.0,0.0,0.0,-0.02,32.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,7.0,2.0,0.0,31.0,67.0,141.0,69.0,38.0,43.0,2.0,3.0,2.0,9.0,0.643462
50%,11.0,4.0,0.0,35.0,70.0,156.0,72.0,40.0,122.0,4.0,5.0,4.0,15.0,0.714286
75%,17.0,6.0,0.0,40.0,73.0,185.0,74.5,41.5,305.0,9.0,8.0,7.0,23.0,0.8
max,88.0,22.0,10.0,60.0,84.0,415.0,84.5,74.0,3122.0,90.0,28.0,47.0,112.0,1.0


## Individual Stats.

TO get details about a specific fighter use the find_fighter function which takes fighter anem and dataframe as input.
An example is

```
find_fighter("Charles Olieveira",df)
```


In [14]:
find_fighter("Charles Olieveira",df)

Unnamed: 0,Name,Nickname,Division Title,Division Body_Wins,Division Body_Losses,Division Body_Draws,Bio Data_Status,Bio Data_Hometown,Bio Data_Age,Bio Data_Height,Bio Data_Weight,Bio Data_Octagon Debut,Bio Data_Reach,Bio Data_Leg reach,Sig. Strikes Landed,Sig. Strikes Attempted,Takedowns Landed,Takedowns Attempted,Striking accuracy,Takedown Accuracy,Records_Wins by Knockout,Records_Wins by Submission,Records_First Round Finishes,Last Fight_Event,Last Fight_Date,Fighter Facts,Bio Data_Fighting style,Bio Data_Trains at,Records_Wins by Decision,Records_Fight Win Streak,Records_Former Champion,Records_Title Defenses,country,Total_fights,Fighter_score,match_score
913,Charles Oliveira,Do Bronx,Lightweight,33.0,9.0,0.0,Active,"State of So Paulo, Brazil",33,70.0,154.5,"Aug. 01, 2010",74.0,41.0,751.0,1405,26.0,84,53%,40%,9.0,21.0,,UFC 280,"Oct. 22, 2022",[14fights back at lightweight (12-2) after ele...,Jiu-Jitsu,Bronx's Gold Team,,,,,Brazil,42.0,0.785714,97


## Count Of Fighters from Each Country.

In [15]:
country_counts = df['country'].value_counts().reset_index()
country_counts.columns = ['Country', 'Fighter Count']

fig = px.bar(country_counts.head(10), x='Country', y='Fighter Count',
             color_discrete_sequence=['#61105E'])

fig.update_layout(
    title="Countries with most MMA Fighters",
    xaxis_title="Country",
    yaxis_title="Fighter Count",
    title_x=0.5,
    yaxis=dict(showgrid=False),
    plot_bgcolor='#FFFFF5'
)

fig.show()

---

## Fighters with Most wins in the octagon.

In [16]:
most_wins = df.sort_values('Division Body_Wins', ascending=False)
most_wins=most_wins[['Name','Division Title','Division Body_Wins','Bio Data_Status']]

most_wins = df.sort_values('Division Body_Wins', ascending=False)

all = most_wins[:50]
top_20_active = most_wins[most_wins['Bio Data_Status'] == 'Active'][:20]
top_20_inactive = most_wins[most_wins['Bio Data_Status'] == 'Not Fighting'][:20]
top_20_Retired = most_wins[most_wins['Bio Data_Status'] == 'Retired'][:20]

fig = go.Figure(data=[go.Table(
    header=dict(values=list(['Name', 'Divsion','Wins','Status']),
                fill_color='#F7E6C4',
                align='left'),
    cells=dict(values=[all.Name, all['Division Title'], all['Division Body_Wins'], all['Bio Data_Status']],
               fill_color='#FFF4F4',
               align='left'))
])

fig.update_layout(
    title="Top 50 Fighters with Most Wins",
    title_x=0.5
)

fig.show()

---

In [17]:
# Creating a bar chart for top 20 active fighters
fig_active = go.Figure(data=[go.Bar(
    x=top_20_active['Name'],
    y=top_20_active['Division Body_Wins'],
    marker_color='#19A7CE'
)])
fig_active.update_layout(
    title="Top 20 Active Fighters with Most Wins",
    xaxis_title="Fighter Name",
    yaxis_title="Number of Wins",
    title_x=0.5,
    yaxis=dict(showgrid=False),
    plot_bgcolor='#F2FFE9'
)

# Creating a bar chart for top 20 inactive fighters
fig_inactive = go.Figure(data=[go.Bar(
    x=top_20_inactive['Name'],
    y=top_20_inactive['Division Body_Wins'],
    marker_color='#A555EC'
)])
fig_inactive.update_layout(
    title="Top 20 Inactive Fighters with Most Wins",
    xaxis_title="Fighter Name",
    yaxis_title="Number of Wins",
    title_x=0.5,
    yaxis=dict(showgrid=False),
    plot_bgcolor='#FAEDF0'
)

# Creating a bar chart for top 20 fighters not currently fighting
fig_not_fighting = go.Figure(data=[go.Bar(
    x=top_20_Retired['Name'],
    y=top_20_Retired['Division Body_Wins'],
    marker_color='#E23E57'
)])
fig_not_fighting.update_layout(
    title="Top 20 Retired Fighters with Most Wins",
    xaxis_title="Fighter Name",
    yaxis_title="Number of Wins",
    title_x=0.5,
    yaxis=dict(showgrid=False),
    plot_bgcolor='#FAEDF0'
)

fig_active.show()

In [18]:
fig_inactive.show()

In [19]:
fig_not_fighting.show()

---

## Fighters By style.

In [20]:
count_values = df.groupby('Bio Data_Fighting style')["Name"].count().reset_index()

fig = go.Figure(data=[go.Pie(labels=count_values['Bio Data_Fighting style'], values=count_values['Name'])])

# Customize the layout
fig.update_layout(
    title=" Fighting Style Distribution",
    showlegend=True,
    title_x=0.5,
    plot_bgcolor ='#FAEDF0'
)

# Show the plot
fig.show()


---

## Fighters in Divisions.

In [21]:
divisions = df.groupby('Division Title')['Name'].count().reset_index().sort_values('Name',ascending= False)
knockout_by_division = df.groupby('Division Title')['Records_Wins by Knockout'].sum().reset_index().sort_values('Records_Wins by Knockout',ascending= False)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=divisions['Division Title'],
    y=divisions['Name'],
    name='Fighters'
))

fig.add_trace(go.Bar(
    x=knockout_by_division['Division Title'],
    y=knockout_by_division['Records_Wins by Knockout'],
    name='Knockouts'
))

fig.update_layout(
    title="Fighters and Knockouts by Division",
    xaxis_title="Division",
    yaxis_title="Count",
    barmode='group',
    title_x=0.5,
    plot_bgcolor ='#FAEDF0',
    yaxis = dict(showgrid=False)
)
fig.show()

---

In [22]:
most_knockouts = df.sort_values('Records_Wins by Knockout',ascending=False).iloc[:,[0,2,3,4,5,6,8,26,27]]
most_submissions = df.sort_values('Records_Wins by Submission',ascending=False).iloc[:,[0,2,3,4,5,6,8,26,27]]

In [23]:
# Data for scatter plot
y = df['Records_Wins by Knockout']
x = df['Records_Wins by Submission']
names = df['Name']

# Create scatter plot
fig = go.Figure(data=go.Scatter(
    x=x,
    y=y,
    mode='markers',
    text=names,
    marker=dict(
        size=7,
        line=dict(width=1, color='black'),
        symbol='circle',
        color = '#9336B4'
    )
))

fig.update_layout(
    title='Fighters: Wins by Submission vs. Wins by Knockout',
    xaxis_title='Wins by Submission',
    yaxis_title='Wins by Knockout',
    hovermode='closest',
    title_x=0.5,
    plot_bgcolor='#FFF6F4',
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False)
)

fig.show()


---

## Best Fighters of all time.

This metric is calculated by dividing total number of wins with total number of fights.



```
fighter_Score = wins/Total_Fights
```



*If the score is 1 means the fighter is undefeated.*

**NOTE: Fighters Included only with more than 20 Fights.**


---







In [24]:
Best_fighters = df.sort_values(['Fighter_score','Total_fights'],ascending = False).loc[:,['Name',"Fighter_score",'Total_fights','Division Title','country']]
Best_fighters = Best_fighters[Best_fighters['Total_fights'] > 20][:100]

In [25]:
Best_fighters['Index'] = range(1, len(Best_fighters) + 1)

fig = go.Figure(data=[go.Table(
    header=dict(values=list(['Index', 'Name', 'Fighter_score', 'Total_fights', 'Division', 'Country']),
                fill_color='#F7E6C4',
                align='left'),
    cells=dict(values=[Best_fighters.Index, Best_fighters.Name, Best_fighters.Fighter_score,
                       Best_fighters.Total_fights, Best_fighters['Division Title'], Best_fighters['country']],
               fill_color='#FFF4F4',
               align='left'))
])

fig.update_layout(
    title=dict(
        text="<b>Best Fighters</b>",
        font=dict(size=18, family="Arial", color="black"),
        x=0.5
    )
)

fig.show()


In [26]:
y = Best_fighters['Total_fights']
x = Best_fighters['Fighter_score']
names = Best_fighters['Name']

# Create scatter plot
fig = go.Figure(data=go.Scatter(
    x=x,
    y=y,
    mode='markers',
    text=names,
    marker=dict(
        size=10,
        line=dict(width=1, color='black'),
        symbol='circle',
        color = '#9336B4'
    )
))


fig.update_layout(
    title="Best Fighters Of all time.",
    xaxis_title="Fighter Score",
    yaxis_title="Total Fights",
    title_x=0.5,
    plot_bgcolor='#FAEDF0',
    yaxis=dict(showgrid=False)
)

fig.show()

---


# Some Insights 📜 ⇨


1.   The majority of fighters in the UFC hail from the United States, with Brazil having the second-largest representation.

2.   The most commonly practiced fighting style among UFC fighters is mixed martial arts (MMA), followed by freestyle, jiu-jitsu, and kickboxing.

3.  The welterweight division stands out for having the highest number of knockouts, indicating that it is an action-packed category with a substantial roster of talented fighters.

4.   Among the current active fighters, Shayilan Nuerdanbieke holds an impressive record with 36 wins, highlighting a remarkable achievement. However, it should be noted that Matt Hughes, who is retired, holds the all-time record with 46 wins.

5. **Ben Rothwell** deserves recognition for his exceptional striking abilities, as he holds the record for the most knockouts (28) in the history of the UFC.

6. **Aleksei Oleinik's** submission skills are noteworthy, as he has successfully executed an impressive total of 47 submissions in his UFC career, showcasing his proficiency in grappling techniques.

7. When it comes to discussing the greatest fighters in MMA history, **Khabib Nurmagomedov's** undefeated record of 29 wins and 0 losses cannot be ignored, solidifying his claim as one of the sport's all-time greats.

8. It is worth mentioning that **Anshul Jubli** proudly represents India as the sole active UFC fighter from the country, contributing to the global diversity of the organization.



###                                   Thank You!!😎
---
