<a href="https://colab.research.google.com/github/nateq13f/Data-Analysis/blob/master/ESPN_fighters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import folium
import os
from folium.plugins import HeatMap
from folium.plugins import FastMarkerCluster

In [None]:
#Reading in the csv file 
fighters = pd.read_csv('/content/Profile.csv')

In [None]:
#Getting basic info about the data frame
fighters.info()
fighters.index.values

In [None]:
# Append the first and last name together to make one name field
fighters['name'] = fighters['firstname'] + ' ' + fighters['lastname']
#fighters.info()
#print(fighters)

In [None]:
# split the record field into Win - Loss - Draw... and delete the draw field because I only want to analyze the wins and losses.

fighters['win'], fighters['loss'], fighters['draw'] = fighters['record'].str.split('-|/', 2).str
#print(fighters)

In [None]:
# Delete the record, draw, first name, and last name fields because I only want to analyze the wins and losses.
fighters.info()
print(fighters[:20])
fighters.drop(['record', 'draw', 'firstname', 'lastname'], axis=1, inplace=True)
print(fighters.columns.values)

# clean the empty names and teams by dropping those records
fighters = fighters[fighters.name != 'none none']
fighters = fighters[fighters.team != 'team unknown']

print(fighters)

In [None]:
# DATA Analysis
# Calculate win/loss ratio
# Replace losses of "0" with "1" because ratio will not work with "0" ( can not divide by 0)
# Convert the strings to float data types

fighters['loss'] = fighters['loss'].replace(['0'],'1')

print(fighters[39:42])

fighters['win'] = fighters['win'].astype(float)
fighters['loss'] = fighters['loss'].astype(float)

print(fighters.dtypes)

fighters['ratio'] = (fighters['win'] / fighters['loss'])

precision = 2
fighters['ratio'] = fighters['ratio'].round(decimals = precision)

print(fighters)

#fighters.to_csv('/content/CleanedFighters.csv',
#          encoding='utf-8', index=False)

In [None]:
# Get the sum of every teams win ratio because some teams have multiple fighters
teamgroups = fighters.groupby(['team'], as_index = False).sum()
print(teamgroups)
print(teamgroups.info())

In [None]:
!pip install plotly==4.7.1
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4

In [None]:
# Second bar chart to visualize the ratio, wins, and losses together in one place
fig = go.Figure()
fig.add_trace(go.Bar(
    x=teamgroups.team[:20],
    y=teamgroups.ratio,
    name='win/loss ratio',
    marker_color='red'
))
fig.add_trace(go.Bar(
    x=teamgroups.team[:20],
    y=teamgroups.win,
    name='wins',
    marker_color='darkblue'
))
fig.add_trace(go.Bar(
    x=teamgroups.team[:20],
    y=teamgroups.loss,
    name='losses',
    marker_color='magenta'
))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(title='Ratio, wins, and losses of top MMA gyms', barmode='group', xaxis_tickangle=-45, xaxis={'categoryorder':'total descending'})

pio.write_image(fig, '/content/fig6.png', width=1400, height=700)
fig.show()

In [None]:
# Bar chart ordered by highest total win ratio
fig = px.bar(teamgroups, x = teamgroups.team[:20], y=teamgroups.ratio[:20], color_discrete_sequence=px.colors.qualitative.Set1).update_xaxes(categoryorder="total descending")
fig.update_layout(
    title='Combined win/loss ratios of fighters from MMA gyms with top 10 UFC fighters',
    xaxis_title="Team names",
    yaxis_title='Ratio')

pio.write_image(fig, '/content/TotalRatio.png', width=1400, height=700)
fig.show()

# My intital predication was that AKA(American Kickboxing Academy) would have the best ratio but the data insights proved me wrong.