# Backend Data Generation
---
This notebook generates the backend data needed for the app to fetch latest stat of a fighter

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 100)

df = pd.read_csv("data/UFC_processed.csv")

df["date"] = pd.to_datetime(df["date"]) # date as datetime

features = ["date","fighter"]
for name in df.columns[4:25]: # slice string to separate prefix (e.g: R_)
    features.append(name[2:]) # add more to features

# separate fighters
blueFighter = pd.concat([df.iloc[:,[0,1]],df.iloc[:,4:25]],axis=1)
redFighter = pd.concat([df.iloc[:,[0,2]],df.iloc[:,25:]],axis=1)
# rename columns
blueFighter.columns = features
redFighter.columns = features

# join them in one table
fighters = pd.concat([redFighter,blueFighter],axis=0,).reset_index(drop=True)

In [2]:
# as each fighter has fought multiple matches, in order to get the latest stat for each fighter, we have to group them by name
# and get the details of their latest match:
l = fighters.groupby("fighter")

fighters_detail = []
# for each unique fighter:
#    1- groupby fighter's name to get all their fights
#    2- sort the values by data in newest to oldest format
#    3- get the first element (i.e: iloc0) which is the newest
for fighter in fighters["fighter"].unique(): 
    fighters_detail.append(l.get_group(fighter).sort_values(by=["date"],ascending=False).iloc[0])

In [3]:
fighter_stat = pd.DataFrame(fighters_detail).sort_values(by="fighter")
fighter_stat.insert(0, 'ID', np.arange(1,len(fighter_stat.index)+1))
fighter_stat.reset_index(drop=True, inplace=True)

In [4]:
# export dataset
fighter_stat.to_csv("data/FIGHTER_STAT.csv",index=False)