In [2]:
import pandas as pd
import numpy as np

# v1.1 - adjusted code to calculate Mobility (% JV to V) as a funciton of JV athletes (not Varsity)
# since some years varsity programs did not have a corresponding JV

aps = pd.read_csv('data/AP cleaned (v3).csv')

In [3]:
# isolate just JV teams
jv_df = aps[(aps["Program (Level)"] == "JV") & (aps["Program (Level)"].notna())]

# remove XC and Wrestling as they only had JV programs a few years
jv_df = jv_df[(jv_df["Program (name)"] != "Cross Country") & (jv_df["Program (name)"] != "Wrestling")]
jv_df

Unnamed: 0,Person ID,Gender code,Race/ethnicity,FA,Acad Yr (start),Code,Program (name),Program (Gender),Program (Level),Program (Season),Grade at Time of Activity,Grad year
44,19211,M,African American,0,2000,S,Baseball,Boys,JV,Spring,10,2003
46,19105,M,European American (Caucasian),0,2000,S,Baseball,Boys,JV,Spring,9,2004
55,22837,M,European American (Caucasian),0,2000,S,Baseball,Boys,JV,Spring,9,2004
57,13515,M,Latino/Hispanic American,0,2000,S,Baseball,Boys,JV,Spring,9,2004
66,18681,M,0,0,2000,S,Baseball,Boys,JV,Spring,11,2002
...,...,...,...,...,...,...,...,...,...,...,...,...
38566,267226,F,Multiracial American,0,2022,S,Volleyball,Girls,JV,Fall,9,2026
38567,267166,F,Asian American,0,2022,S,Volleyball,Girls,JV,Fall,10,2025
38569,254454,F,Asian American,0,2022,S,Volleyball,Girls,JV,Fall,10,2025
38570,268365,F,European American (Caucasian),0,2022,S,Volleyball,Girls,JV,Fall,9,2026


In [4]:
# Add a column of "full name" to concatenate gender + prog
jv_df['full name'] = jv_df["Program (Gender)"] + ' ' + jv_df["Program (name)"]

# Create an array of "full name" with prefixed 'program (gender)'
jv_prog_names = jv_df["full name"].unique()
jv_prog_names

# create an array of all JV athletes
jv_athletes = jv_df["Person ID"].unique()
len(jv_athletes)

2396

In [5]:
# isolate just varsity teams
v_df = aps[(aps["Program (Level)"] == "Varsity") & (aps["Program (Level)"].notna())]

# remove programs that have no sub-varsity level
v_df = v_df[(v_df["Program (name)"] != "Alpine Skiing") & (v_df["Program (name)"] != "Cross Country")
                  & (v_df["Program (name)"] != "Golf") & (v_df["Program (name)"] != "Wrestling") &
                  (v_df["Program (name)"] != "Sailing")]

In [6]:
# Add a column of "full name" to concatenate gender + prog
v_df['full name'] = v_df["Program (Gender)"] + ' ' + v_df["Program (name)"]

# Create an array of "full name" with prefixed 'program (gender)'
v_prog_names = v_df["full name"].unique()

# create an array of all varsity athletes
v_athletes = v_df["Person ID"].unique()
len(v_athletes)

2206

In [9]:
mobility_df = pd.DataFrame(columns=['Program', '# of JV athletes', '# of Varsity athletes', 'Overlap', 'Mobility (% JV to V)'])

# run above code for all programs
for each in v_prog_names:
    
    # only run code if JV has a corresponding program
    if (jv_df['full name'] == each).any():
        # create a temporary varsity dataframe with only full prog name
        temp_v_df = v_df[v_df['full name'] == each]
        v_athletes = temp_v_df["Person ID"].unique()

        # create a temporary JV dataframe with only full prog name
        temp_jv_df = jv_df[jv_df['full name'] == each]
        jv_athletes = temp_jv_df["Person ID"].unique()

        # find players that were in both varisty and jv
        overlap = np.intersect1d(v_athletes, jv_athletes)
        
        # calculate what percentage of varsity players had also played on JV
        mobility_percent = len(overlap) / len(jv_athletes) 
        mobility_percent = round(mobility_percent * 100, 1)

        # add entry to mobility df
        # count rows just to be able to state a row_num for .loc
        row_num = len(mobility_df)
        mobility_df.loc[row_num] = [each, len(jv_athletes), len(v_athletes), len(overlap), mobility_percent]

In [10]:
mobility_df.sort_values(by= 'Mobility (% JV to V)')

Unnamed: 0,Program,# of JV athletes,# of Varsity athletes,Overlap,Mobility (% JV to V)
16,Boys Tennis,225,81,31,13.8
1,Girls Basketball,206,133,31,15.0
8,Girls Hockey,215,168,37,17.2
17,Girls Tennis,198,84,40,20.2
15,Girls Squash,201,90,42,20.9
12,Girls Soccer,282,180,65,23.0
14,Boys Squash,221,113,56,25.3
10,Girls Lacrosse,318,223,86,27.0
13,Girls Softball,127,105,36,28.3
18,Girls Volleyball,128,79,39,30.5
