In [573]:
import pandas as pd
import statistics
import heapq


mens_data = pd.read_csv("mens_gym_data_fil.csv")
womens_data = pd.read_csv("womens_gym_data_fil.csv")
mens_data["Month"] = mens_data["Date"].apply(lambda x: x.split(" ")[-2])
mens_data['Month'] = mens_data['Month'].replace({'Jan': 1, 'Feb': 2, 'Mar': 3, 
                                   'Apr': 4, 'May': 5, 'June': 6, 'July': 7, 'Aug': 8, 
                                   'Sept': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12})
womens_data["Month"] = womens_data["Date"].apply(lambda x: x.split(" ")[-2])
womens_data['Month'] = womens_data['Month'].replace({'Jan': 1, 'Feb': 2, 'Mar': 3, 
                                   'Apr': 4, 'May': 5, 'June': 6, 'July': 7, 'Aug': 8, 
                                   'Sept': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12})

mens_data['name'] = mens_data['name'].replace('Fred RICHARD', "Frederick Nathaniel RICHARD")
mens_data['name'] = mens_data['name'].replace('Frederick RICHARD', "Frederick Nathaniel RICHARD")
mens_data['name'] = mens_data['name'].replace('Joshua KARNES', "Joshua Andrew KARNES")
mens_data['name'] = mens_data['name'].replace("Curran Michael PHILLIPS", "Curran PHILLIPS")
mens_data['name'] = mens_data['name'].replace("Taylor Troy CHRISTOPULOS", "Taylor CHRISTOPULOS")
mens_data['name'] = mens_data['name'].replace('Khoi Alexander YOUNG', 'Khoi YOUNG')
mens_data['name'] = mens_data['name'].replace('Ian Hunter SKIRKEY', 'Ian SKIRKEY')
mens_data['name'] = mens_data['name'].replace('Shane Michael WISKUS', 'Shane WISKUS')
mens_data['name'] = mens_data['name'].replace('Yul Kyung Tae MOLDAUER', 'Yul MOLDAUER')

womens_data['name'] = womens_data['name'].replace('Joscelyn Michelle ROBERSON', 'Joscelyn ROBERSON')
womens_data['name'] = womens_data['name'].replace('Nola Rhianne MATTHEWS', 'Nola MATTHEWS')







In [574]:
def weight_adjustment(month, year, roundtype):
    weight = 1
    if roundtype in ["AAfinal", "TeamFinal", "final"]:
        weight += 0.5
    if year == 2022:
        weight = 1 + 0.0625 * (month - 1)
    else:
        weight = 1.75 + 0.0625 * (month - 1)
        
    return weight

def adjusted_score_c(data_point):
    adjusted_score = 14
    rank = data_point["Rank"]
    if rank == 1:
        adjusted_score *= 10
    elif rank == 2:
        adjusted_score *= 7.5
    elif rank == 3:
        adjusted_score *= 5
    elif rank <= 5:
        adjusted_score *= 4
    elif rank <= 10:
        adjusted_score *= 2.5
    elif pd.isna(rank):
        adjusted_score *= 1
    
    return adjusted_score

In [575]:
mens_apps = set(mens_data['Apparatus'])

## Starting with the male performances
## 1. I would like to put recent performances with more weights.
## 2. I would like to weight top medals with higher weights.
## 3. I would like to weight final round performances higher.
## 4. I would like to weight consistency more. -> This'll come in for determining our best guys OR as a smell check
## 5. If they don't rank in a particular event, I heavily weigh against.


## Structure of the Dict
## {Abe: [(AppA: WeightedScore), (AppB: WeightedScore), ...]

## WeightedScore calculated by ListOfScores: [AdjustedScore, WeightGiven]

## WeightGiven
## Recent Performances = 1 - 1.75 if 2022, 1.75 - 2.5 if 2023
## Round Type = 1 if not final, 1.5 if final

## AdjustedScore
## TopMedal = 2 if Gold, 1.75 if Silver, 1.5 if Bronze, 1.25 if 4th or 5th
## RankingBad = 0.6x if <30th, 0.85x if <20th

## Measure Consistency at the End
def create_performances(mens_data):
    male_performances = {}
    males_sw = {}
    for i in set(mens_data['name']):
        person_data = mens_data[mens_data["name"] == i].copy()
        performances = []
        score_and_weights = []
        for j in set(mens_data['Apparatus']):
            rel_data = person_data[person_data["Apparatus"] == j].copy()
            total_score = 0
            total_weights = 0
            for k in range(0, len(rel_data)):
                rel_point = rel_data.iloc[k]
                weightgiven = weight_adjustment(rel_point["Month"], rel_point["Year"], rel_point["Round"])
                adjusted_score = adjusted_score_c(rel_point)
                score_and_weights.append((weightgiven, adjusted_score, j, rel_point['Score'], rel_point['Rank']))
                total_score += weightgiven * adjusted_score
                total_weights += weightgiven
            if len(rel_data) == 0:
                performances.append((j, 0))
            else:
                performances.append((j, total_score / total_weights))
        males_sw[i] = score_and_weights
        male_performances[i] = performances
    return male_performances, males_sw
            
    
def create_sum_avgs(mens_data):
    male_performances, males_swe = create_performances(mens_data)
    male_sum_scores = []
    male_avg_scores = []
    male_best1_scores = []
    male_best2_scores = []
    male_best3_scores = []
    for m in set(mens_data['name']):
        personsscores = male_performances[m]
        sc = [s[1] for s in personsscores]
        male_sum_scores.append((m, sum(sc)))
        stripped = []
        for i in sc:
            if i > 8:
                stripped.append(i)
        if len(stripped) == 0:
            continue
        ## Best 1
        male_best1_scores.append((m, max(stripped)))
        
        ## Best 2
        if len(stripped) >= 2:
            male_best2_scores.append((m, statistics.mean(heapq.nlargest(2, stripped))))
        
        ## Best 3
        if len(stripped) >= 3:
            male_best3_scores.append((m, statistics.mean(heapq.nlargest(3, stripped))))
    
    return male_sum_scores, males_swe, male_performances, [male_best1_scores, 
                                                                            male_best2_scores, male_best3_scores]          
            
        

In [576]:
male_sum_scores, males_swe, male_performances, best_scores = create_sum_avgs(mens_data)

In [577]:
sorted(male_sum_scores, key=lambda x: x[1], reverse = True)[0:10]

[('Brody MALONE', 249.80154659560017),
 ('Donnell WHITTENBURG', 195.52011560555243),
 ('Frederick Nathaniel RICHARD', 190.83141796337506),
 ('Asher HONG', 189.26565927742791),
 ('Paul JUDA', 178.88345434011973),
 ('Taylor BURKHART', 174.98751609709757),
 ('Joshua Andrew KARNES', 171.18041336681475),
 ('Khoi YOUNG', 169.02882167446435),
 ('Colt WALKER', 165.77200819602803),
 ('Cameron BOCK', 158.15374820576932)]

In [578]:
sorted(best_scores[0], key=lambda x: x[1], reverse = True)[0:10]

[('Brody MALONE', 84.75796178343948),
 ('Curran PHILLIPS', 77.2883295194508),
 ('Donnell WHITTENBURG', 67.7936507936508),
 ('Stephen NEDOROSCIK', 60.963276836158194),
 ('Frederick Nathaniel RICHARD', 49.170731707317074),
 ('Ian SKIRKEY', 48.38659793814433),
 ('Khoi YOUNG', 47.75471698113208),
 ('Taylor BURKHART', 47.43283582089552),
 ('Paul JUDA', 47.26315789473684),
 ('Colt WALKER', 47.09493670886076)]

In [579]:
sorted(best_scores[1], key=lambda x: x[1], reverse = True)[0:10]

[('Brody MALONE', 70.17941948821097),
 ('Curran PHILLIPS', 58.97936029045165),
 ('Donnell WHITTENBURG', 52.345674501684734),
 ('Khoi YOUNG', 46.47486886401002),
 ('Taylor BURKHART', 45.373932361314814),
 ('Paul JUDA', 44.39546783625731),
 ('Frederick Nathaniel RICHARD', 44.30745887691435),
 ('Asher HONG', 43.302864420729975),
 ('Riley LOOS', 39.993769470404985),
 ('Joshua Andrew KARNES', 38.70359281437126)]

In [580]:
sorted(best_scores[2], key=lambda x: x[1], reverse = True)[0:10]

[('Brody MALONE', 58.93157880410646),
 ('Curran PHILLIPS', 48.81771479090829),
 ('Donnell WHITTENBURG', 43.92909665425969),
 ('Paul JUDA', 41.57825196574458),
 ('Frederick Nathaniel RICHARD', 40.183796114021334),
 ('Khoi YOUNG', 40.17516510125921),
 ('Asher HONG', 38.94363146812462),
 ('Taylor BURKHART', 37.883954907543206),
 ('Joshua Andrew KARNES', 35.83433133732535),
 ('Colt WALKER', 35.0037544972353)]

In [None]:
### Men's Finalists for Team:
# Brody Malone (X)
# Donnell Whittenburg (X)
# Frederick Nathaniel Richard
# Asher Hong
# Paul Juda
# Taylor Burkhart
# Joshua Karnes
# Khoi Young
# Colt Walker
# Curran Phillips
# Stephen Nedoroscik

In [None]:
### Women's Finalists for Team:
# Simone Biles (X)
# Jordan Chiles (X)
# Joscelyn Roberson
# Kayla Dicello
# Jada Carey
# Shilese Jones
# Zoe Miller
# Kaliya Lincoln
# Skye Blakely 
# Ashlee Sullivan

In [581]:
womens_apps = set(womens_data['Apparatus'])
womens_sum_scores, womens_swe, womens_performances, womens_best_scores = create_sum_avgs(womens_data)

In [582]:
sorted(womens_sum_scores, key=lambda x: x[1], reverse = True)[0:10]

[('Simone BILES', 287.8896060726934),
 ('Jordan CHILES', 198.08152106266806),
 ('Joscelyn ROBERSON', 183.10938550379834),
 ('Kayla DICELLO', 172.3985411365564),
 ('Jade CAREY', 168.59192581868828),
 ('Shilese JONES', 167.67622030422),
 ('Zoe MILLER', 158.59610351846467),
 ('Kaliya LINCOLN', 154.59863053613054),
 ('Skye BLAKELY', 114.4503574692254),
 ('Ashlee SULLIVAN', 109.26167496886674)]

In [583]:
sorted(womens_best_scores[0], key=lambda x: x[1], reverse = True)[0:10]

[('Zoe MILLER', 102.78947368421052),
 ('Kaliya LINCOLN', 91.70673076923077),
 ('Jade CAREY', 89.96954314720813),
 ('Simone BILES', 86.71146245059289),
 ('Jordan CHILES', 82.32911392405063),
 ('Joscelyn ROBERSON', 64.12978369384359),
 ('Kayla DICELLO', 60.30152671755725),
 ('Shilese JONES', 59.72374429223744),
 ('Jordis EICHMAN', 56.0),
 ('Ashlee SULLIVAN', 53.69318181818182)]

In [584]:
sorted(womens_best_scores[1], key=lambda x: x[1], reverse = True)[0:10]

[('Simone BILES', 86.71146245059289),
 ('Jordan CHILES', 68.43354430379748),
 ('Joscelyn ROBERSON', 63.77869275189917),
 ('Zoe MILLER', 61.87539982553068),
 ('Jade CAREY', 61.19596290934413),
 ('Kaliya LINCOLN', 60.22355769230769),
 ('Kayla DICELLO', 50.96409669211196),
 ('Shilese JONES', 50.580784063217166),
 ('Ashlee SULLIVAN', 40.63083748443338),
 ('Skye BLAKELY', 38.36039886039886)]

In [585]:
sorted(womens_best_scores[2], key=lambda x: x[1], reverse = True)[0:10]

[('Simone BILES', 83.59561309142482),
 ('Jordan CHILES', 56.373744084667926),
 ('Joscelyn ROBERSON', 56.36979516793278),
 ('Jade CAREY', 51.530641939562756),
 ('Zoe MILLER', 48.19870117282156),
 ('Kaliya LINCOLN', 46.86621017871018),
 ('Shilese JONES', 46.83702756318038),
 ('Kayla DICELLO', 45.93310149844501),
 ('Skye BLAKELY', 33.4834524897418),
 ('Ashlee SULLIVAN', 31.753891656288918)]

In [586]:
set(womens_performances.keys())

{'Addison FATTA',
 'Alicia ZHOU',
 'Amelia DISIDORE',
 'Ashlee SULLIVAN',
 'Avery MOLL',
 'Brooke PIERSON',
 'Charlotte BOOTH',
 'Dulcy CAYLOR',
 'Elle MUELLER',
 'Eveylynn LOWE',
 'Jade CAREY',
 'Jordan CHILES',
 'Jordis EICHMAN',
 'Joscelyn ROBERSON',
 'Kaliya LINCOLN',
 'Katelyn JONG',
 'Katelyn ROSEN',
 'Kayla DICELLO',
 'Kelise WOOLFORD',
 'Leanne WONG',
 'Levi JUNG-RUIVIVAR',
 'Lexi ZEISS',
 'Madray JOHNSON',
 'Marissa NEAL',
 'Michelle PINEDA',
 'Myli LEW',
 'Nola MATTHEWS',
 'Shilese JONES',
 'Simone BILES',
 'Skye BLAKELY',
 'Sunisa LEE',
 'Tiana SUMANASEKERA',
 'Zoe MILLER'}

In [591]:
womens_performances["Leanne WONG"]

[('UB', 19.072033898305083),
 ('FX', 27.442786069651742),
 ('BB', 25.222672064777328),
 ('VT', 23.83606557377049)]

In [590]:
womens_data[womens_data['name'] == "Leanne WONG"]

Unnamed: 0,LastName,FirstName,Gender,Country,Date,Competition,Round,Location,Apparatus,Rank,D_Score,E_Score,Penalty,Score,Nation,name,Year,Month
775,WONG,Leanne,w,USA,29 Oct 2022 - 6 Nov 2022,2022 51st FIG Artistic Gymnastics World Champi...,TeamFinal,"Liverpool, England",UB,,5.7,8.066,,13.766,USA,Leanne WONG,2022,11
776,WONG,Leanne,w,USA,29 Oct 2022 - 6 Nov 2022,2022 51st FIG Artistic Gymnastics World Champi...,qual,"Liverpool, England",VT,21.0,5.0,8.766,,13.766,USA,Leanne WONG,2022,11
777,WONG,Leanne,w,USA,29 Oct 2022 - 6 Nov 2022,2022 51st FIG Artistic Gymnastics World Champi...,qual,"Liverpool, England",VT,9.0,5.0,8.766,,13.766,USA,Leanne WONG,2022,11
778,WONG,Leanne,w,USA,29 Oct 2022 - 6 Nov 2022,2022 51st FIG Artistic Gymnastics World Champi...,qual,"Liverpool, England",VT,9.0,4.2,8.9,,13.1,USA,Leanne WONG,2022,11
779,WONG,Leanne,w,USA,19-20 Aug 2022,2022 U.S. Championships,qual,"Tampa, FL",BB,,5.5,7.65,,13.15,USA,Leanne WONG,2022,8
780,WONG,Leanne,w,USA,19-20 Aug 2022,2022 U.S. Championships,final,"Tampa, FL",BB,,5.6,7.8,,13.4,USA,Leanne WONG,2022,8
781,WONG,Leanne,w,USA,19-20 Aug 2022,2022 U.S. Championships,qual,"Tampa, FL",UB,,5.7,8.5,,14.2,USA,Leanne WONG,2022,8
782,WONG,Leanne,w,USA,19-20 Aug 2022,2022 U.S. Championships,final,"Tampa, FL",UB,,5.7,8.55,,14.25,USA,Leanne WONG,2022,8
783,WONG,Leanne,w,USA,28-31 July 2022,2022 U.S. Classic,AAfinal,"West Valley City, Utah",BB,1.0,5.4,8.25,0.1,13.55,USA,Leanne WONG,2022,7
784,WONG,Leanne,w,USA,28-31 July 2022,2022 U.S. Classic,AAfinal,"West Valley City, Utah",FX,3.0,5.7,7.9,,13.6,USA,Leanne WONG,2022,7
