# Aviary Competition Scoring Procedure 2023

## Imports

Make sure to install pandas in your python enviroment

In [None]:
import pandas as pd

# Import data

In [None]:
sends = pd.read_csv('sample_data.csv')

In [None]:
sends = sends.rename(columns={'Email Address' : 'email',
                              'Full Name' : 'name',
                              'Category' : 'category',
                              'Anchor?' : 'anchor',
                              'Colour?': 'colour',
                              'Number of Attempts' : 'num_attempts',
                              'Belayer (Full Name)' : 'belayer',
                              'Witness #2 (Full Name)' : 'witness'})


In [None]:
sends.head()

## Data Cleaning

In [None]:
# all strings to lower case and strip of leading and lagging spaces
sends['email'] = sends['email'].str.lower().str.strip()
sends['name'] = sends['name'].str.lower().str.strip()
sends['belayer'] = sends['belayer'].str.lower().str.strip()
sends['witness'] = sends['witness'].str.lower().str.strip()

In [None]:
# Fix route mismatch and commonly entered wrong routes
sends.loc[sends['colour'] == 'Hand Crack', 'anchor'] = 2  # The hand crack is on anchor 2

sends.loc[(sends['anchor'] == 1) & (sends['colour'] == 'Red'), 'anchor'] = 2 # 1 Red -> 2 Red

sends.loc[(sends['anchor'] == 5) & (sends['colour'] == 'Green'), 'colour'] = 'Lime' # 5 Green -> 5 Lime

sends.loc[(sends['anchor'] == 11) & (sends['colour'] == 'Pink'), 'anchor'] = 12 # 11 Pink -> 12 Pink


### Fill in missing values for name and category
We made the name and category questions optional because user feedback indicated there were too many questions to fill out for each response. 
For responses without a name and category, we expect there will exist one repsone using the same email to contain a name and category and will use that value for all other responses without a name and category. 


In [None]:
## There must be a better way to do this 🤷 

sends_filled = pd.concat([sends['email'], sends.groupby('email').fillna(method='ffill')], axis=1)
sends_filled = pd.concat([sends['email'], sends_filled.groupby('email').fillna(method='bfill')], axis=1)

## Check Assumptions
The scoring section assumes:
1. there are no duplicate entries (ie the same route is submitted twice)
2. people have entered the same name for all their entries (check : one name per email address)
3. people dont have the same name or used a different email address (check : one email address per name)
4. people only entered one category

We should probably make sure this is true.

The challenge would be that solving these errors would likely have to be done manually.

In [None]:
# Assumption 1
# Identify anchor/colour duplicates

sends_filled[sends_filled.duplicated(subset=['name', 'email', 'anchor', 'colour'], keep=False)]

# The following will remove the duplicates and keep the first instance 
sends_filled = sends_filled.drop_duplicates(subset=['name', 'email', 'anchor', 'colour'], keep='first')
sends_filled[sends_filled.duplicated(subset=['name', 'email', 'anchor', 'colour'], keep=False)]

In [None]:
# Assumption 2
# Emails with more than one name would give us hints to if someone mistyped their name
# Emails with zero names would suggest they mistyped their email address or failed to provide a name
email_counts = sends_filled[['name','email']].groupby(by='email').nunique()
email_counts[email_counts['name'] != 1]

In [None]:
# Assumption 3
# Names with more than one email would give us hints to if two people share the same name (or someone mistyped their email)
name_counts = sends_filled[['name','email']].groupby(by='name').nunique()
name_counts[name_counts['email'] != 1]

In [None]:
# Assuption 4
# Check to make sure each person is only entered one category
name_counts = sends_filled[['name','category']].groupby(by='name').nunique()
name_counts[name_counts['category'] != 1]

In [None]:
# check list of names/emails to see if any problems can be manually identified
sends_filled[['name','email']].drop_duplicates().sort_values(by=['name', 'email'])

We can now proceed with these assumptions

## Determine Points per Send

### Bind Points to Sends

Points per route

In [None]:
points_per_route = pd.read_csv('points_per_route.csv')
points_per_route.head()

Points for number of attemps

In [None]:
points_per_attempt = pd.read_csv('points_per_attempt.csv')
points_per_attempt.head()

Bind points (by route) to sends

In [None]:
sends_points = pd.merge(sends_filled, points_per_route, on=['anchor', 'colour'], how='left')

In [None]:
sends_points = pd.merge(sends_points, points_per_attempt, on='num_attempts', how='left')

In [None]:
sends_points.head()

### Check

In [None]:
# Check to see if anyone claimed to climb a route I didnt know about 
sends_points[sends_points['route_points'].isna()]

### Caluclate total points per send

In [None]:
sends_points['points'] = sends_points['route_points'] * sends_points['attempts_points']
sends_points.head()

# Scores per Climber per Category

#### Recreational - men

In [None]:
# subset the points df
rec_men = sends_points.loc[sends_points['category'] == 'Recreational - men']

# group sends by individual and sum points
rec_men[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

#### Open - men

In [None]:
# subset the points df
open_men = sends_points.loc[sends_points['category'] == 'Open - men']

# group sends by individual and sum points
open_men[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

#### Recreational - anyone but men

In [None]:
# subset the points df
rec_abm = sends_points.loc[sends_points['category'] == 'Recreational - anyone but men']

# group sends by individual and sum points
rec_abm[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

#### Open - anyone but men

In [None]:
# subset the points df
open_abm = sends_points.loc[sends_points['category'] == 'Open - anyone but men']

# group sends by individual and sum points
open_abm[['name','points']].groupby(by=['name']).sum().sort_values(by='points', ascending=False)

#### Top Belayer

In [None]:
sends_points['belayer'].value_counts()

#### Most (total) attempts

on sent routes

In [None]:
sends_points[['name', 'attempts']].groupby(by='name').sum().sort_values(by='attempts', ascending=False).head(n=5)

#### Most routes climbed

Just counts the number of submission by an individual

In [None]:
sends_points.groupby(by=['name']).size().sort_values(ascending=False)

## Send Status Update to Climbers

It would be cool to send everyone an email letting them know what routes we have recorded, their score, and the score cutoff for 5th place in their category. 

The Aviary already uses sendgrid so we're going to use that here too.

### Generate Emails 

First we need to make a data frame with what we want to email. 

In [None]:
import sendgrid
import os
from sendgrid.helpers.mail import Mail, Email, To, Content

In [None]:
email_df = sends_points[['name','email','category','points']].groupby(by=['name','email','category']).sum().reset_index()

# CHECK 
assert(len(email_df) == len(sends_points['name'].unique()))

In [None]:
# Make the content of the emails to send

emails = {}

for index, row in email_df.iterrows() :
    string = f"""Hi {row['name'].title()}! <br>
    <br>
    We're super stoked you signed up for The Aviary's Let's Get Sendy Competition this year! <br>
    There are just three weeks left before the <b> finals on Friday, March 31st </b> and incase you weren't excited enough, we've got a ton of prizes from our sponsors (The Hive, Climb On Squamish, Backcountry Brewing, Ground Up Climbing Centre and more) for both climbers and audience members. <br>
    It's going to be a super fun event, so bring your friends - we hope to see you there! <br>
    <br>
    In the mean time, we wanted to let you know how you're doing so far. <br>
    <br>
    Your score is: {row['points']} <br>
    You are signed up for "{row['category']}" <br>
    <br>
    Here is a list of the climbs we have recorded for you: <br>
    """
    climber_sends = sends_points[sends_points['name'] == row['name']]
    string += climber_sends[['Timestamp','anchor','colour','num_attempts','belayer','witness','route_points','attempts_points','points']].to_html(index = False)

    string += """Your score is just the sum of the points column. <br>
    <br>
    Be sure to record all your climbing before Wednesday, March 29th 2023 @ 6pm and don't forget we're putting up a ton of new climbs every week until then! <br> 
    If you have any questions, feel free to reach out to us at info@ubc-aviary.com or on Instagram @ubcaviary.<br>
    <br>
    Happy Climbing, <br>
    The Aviary <br>
    """
    
    emails[row['email']] = string

In [None]:
# function to send all the emails 
def send_emails(emails):
    sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))

    from_email = Email("info@ubc-aviary.com")  # Change to your verified sender
    subject = "UBC Aviary - Let's Get Sendy! Scorecard Status"

    responses_status = {}
    responses_headers = {}

    for email, content in emails.items():

        to_email = To(email)  # Change to your recipient
        content = Content("text/html", content)
        mail = Mail(from_email, to_email, subject, content)

        # Get a JSON-ready representation of the Mail object
        mail_json = mail.get()

        # Send an HTTP POST request to /mail/send
        response = sg.client.mail.send.post(request_body=mail_json)
        responses_status[email] = response.status_code
        responses_headers[email] = response.headers
    
    return responses_status, responses_headers

In [None]:
# Wrapping the email sending function in a user input to avoid accidental runs

expected_string = "Yes, I want to send all the emails" # the expected string that the user should enter
user_string = input("Please enter the word 'Yes, I want to send all the emails' to send all the emails: ") # prompt the user for input

if user_string == expected_string:
    responses_status, responses_headers = send_emails(emails)
else:
    print("Incorrect. Emails were not sent")

In [None]:
# check all emails were sent ok 

{key: value for key, value in responses_status.items() if value != 202}