# Aviary Competition Scoreing Procedure 2023

## Imports

Make sure to install pandas in your python enviroment

In [None]:
import pandas as pd

# Import data

In [None]:
sends = pd.read_csv('sample_data.csv')

In [None]:
sends = sends.rename(columns={'Email Address' : 'email',
                              'Full Name' : 'name',
                              'Category' : 'category',
                              'Anchor?' : 'anchor',
                              'Colour?': 'colour',
                              'Number of Attempts' : 'num_attempts',
                              'Belayer (Full Name)' : 'belayer',
                              'Witness #2 (Full Name)' : 'witness'})


In [None]:
sends.head()

## Data Cleaning

The scoring section assumes:
1. there are no duplicate entries (ie the same route is submitted twice)
2. people have entered the same name for all their entries (ie a individual has submitted all their entires under the same name)
3. people dont have the same name

We should probably make sure this is true.

The challenge would be that solving these errors would likely have to be done manually.

In [None]:
# all strings to lower case
sends['email'] = sends['email'].str.lower()
sends['name'] = sends['name'].str.lower()
sends['belayer'] = sends['belayer'].str.lower()
sends['witness'] = sends['witness'].str.lower()

In [None]:
# 1 - Identify anchor/colour duplicates, will have to confirm manually which ones to keep/drop

sends[sends.duplicated(subset=['name', 'email', 'anchor', 'colour'], keep=False)]

In [None]:
# 2/3 - check list of names to see if any name variations can be manually identified
sends[['name','email']].drop_duplicates().sort_values(by=['name', 'email'])

In [None]:
# emails with more than one name would give us hints to if someone mistyped their name
sends[['name','email']].groupby(by='email').nunique()

In [None]:
# names with more than one email would give us hints to if two people share the same name (or someone mistyped their email)
sends[['name','email']].groupby(by='name').nunique()

We can now proceed with these assumptions

## Bind Points to Sends

Points per route

In [None]:
points_per_route = pd.read_csv('points_per_route.csv')
points_per_route.sort_values(by=['anchor', 'colour']).head()

Points for number of attemps

In [None]:
points_per_attempt = pd.read_csv('points_per_attempt.csv')
points_per_attempt.head()

Bind points (by route) to sends

In [None]:
sends_points = pd.merge(sends, points_per_route, on=['anchor', 'colour'], how='left')

In [None]:
sends_points = pd.merge(sends_points, points_per_attempt, on='num_attempts', how='left')

In [None]:
sends_points.head()

### Caluclate total points

In [None]:
sends_points['points'] = sends_points['route_points'] * sends_points['attempts_points']
sends_points.head()

# Scores

### Recreational - men

In [None]:
# subset the points df
rec_men = sends_points.loc[sends_points['category'] == 'Recreational - men']

# group sends by individual and sum points
rec_men[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

### Open - men

In [None]:
# subset the points df
open_men = sends_points.loc[sends_points['category'] == 'Open - men']

# group sends by individual and sum points
open_men[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

### Recreational - anyone but men

In [None]:
# subset the points df
rec_abm = sends_points.loc[sends_points['category'] == 'Recreational - anyone but men']

# group sends by individual and sum points
rec_abm[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

### Open - anyone but men

In [None]:
# subset the points df
open_abm = sends_points.loc[sends_points['category'] == 'Open - anyone but men']

# group sends by individual and sum points
open_abm[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

### Top Belayer

In [None]:
sends_points['belayer'].value_counts()

### Most (total) attempts

on sent routes

In [None]:
sends_points[['name', 'attempts']].groupby(by='name').sum().sort_values(by='attempts', ascending=False).head(n=5)

### Most routes climbed

Just counts the number of submission by an individual

In [None]:
sends_points.groupby(by=['name']).size().sort_values(ascending=False)