In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
df = pd.read_csv('../data/texas_recruiting.csv')

In [3]:
df.head()

Unnamed: 0,id,athleteId,recruitType,year,ranking,name,school,committedTo,position,height,weight,stars,rating,city,stateProvince,country,hometownInfo.latitude,hometownInfo.longitude,hometownInfo.fipsCode
0,92318,,HighSchool,2023,1.0,Arch Manning,Isidore Newman,Texas,QB,76.0,215,5,1.0,New Orleans,LA,USA,29.949932,-90.070116,22071.0
1,121,,HighSchool,2002,1.0,Vince Young,Madison,Texas,DUAL,77.0,200,5,1.0,Houston,TX,USA,29.758938,-95.367697,48201.0
2,8266,,HighSchool,2006,4.0,Sergio Kindle,Woodrow Wilson,Texas,ILB,76.0,225,5,0.998,Dallas,TX,USA,32.776272,-96.796856,48113.0
3,17097,501940.0,HighSchool,2010,5.0,Jackson Jeffcoat,Plano West,Texas,SDE,75.0,230,5,0.9976,Plano,TX,USA,33.013676,-96.69251,48085.0
4,127,,HighSchool,2002,6.0,Rodrique Wright,Alief Hastings,Texas,DT,77.0,330,5,0.997,Houston,TX,USA,29.758938,-95.367697,48201.0


In [4]:
df.position.unique()

array(['QB', 'DUAL', 'ILB', 'SDE', 'DT', 'RB', 'OLB', 'WR', 'ATH', 'IOL',
       'OG', 'PRO', 'OT', 'S', 'LB', 'WDE', 'CB', 'TE', 'EDGE', 'FB',
       'OC', 'DL', 'APB', 'K', 'P', 'LS'], dtype=object)

In [5]:
position_map ={
    'QB':'QB',
    'DUAL':'QB',
    'PRO':'QB',
    'RB':'RB',
    'APB':'RB',
    'FB':'RB',
    'WR':'WR',
    'TE':'TE',
    'IOL':'IOL',
    'OG':'IOL',
    'OC':'IOL',
    'OT':'OT',
    'EDGE':'EDGE',
    'SDE':'EDGE',
    'WDE':'EDGE',
    'DT':'IDL',
    'DL':'IDL',
    'LB':'ILB',
    'ILB':'ILB',
    'OLB':'OLB',
    'CB':'CB',
    'S':'S',
    'ATH':'ATH',
    'K':'K',
    'LS':'LS',
    'P':'P'
}

In [6]:
set(df.position.unique()) ^ set(position_map.keys())

set()

In [7]:
df['position_simple'] = df['position'].apply(lambda x: position_map[x])

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 496 entries, 0 to 495
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   id                      496 non-null    int64  
 1   athleteId               302 non-null    float64
 2   recruitType             496 non-null    object 
 3   year                    496 non-null    int64  
 4   ranking                 495 non-null    float64
 5   name                    496 non-null    object 
 6   school                  496 non-null    object 
 7   committedTo             496 non-null    object 
 8   position                496 non-null    object 
 9   height                  496 non-null    float64
 10  weight                  496 non-null    int64  
 11  stars                   496 non-null    int64  
 12  rating                  496 non-null    float64
 13  city                    493 non-null    object 
 14  stateProvince           493 non-null    ob

In [9]:
len(df.id.unique())

496

In [10]:
dupe_counts = df.groupby(['name', 'school'])['id'].count().reset_index(name='count_dupes')

In [11]:
df = pd.merge(df, dupe_counts, how = 'left', on=['name', 'school'])

In [12]:
df_dupes = df[df['count_dupes']==2].drop_duplicates(subset=['name', 'school'], keep='first')

In [13]:
df = pd.concat([df[df['count_dupes']==1], df_dupes])

In [14]:
side = {'offense': ['QB', 'RB', 'WR', 'TE', 'OT', 'IOL'],
'defense': ['ILB', 'OLB', 'CB', 'S', 'EDGE', 'IDL'],
'special_teams_or_athlete':['K', 'P', 'LS', 'ATH']}
def classify_side(simple_position, side):
    for unit in side.keys():
        if simple_position in side[unit]:
            return unit

In [15]:
df['side'] = df['position_simple'].apply(lambda x: classify_side(x, side))

In [16]:
composition = pd.pivot_table(df, index=['position_simple', 'side'], columns=['year'], 
                             values = 'id', aggfunc='count', fill_value=0).reset_index()

In [17]:
composition_score = pd.pivot_table(df, index=['position_simple', 'side'], columns=['year'], 
                             values = 'rating', aggfunc='sum').reset_index()

In [18]:
composition

year,position_simple,side,2000,2001,2002,2003,2004,2005,2006,2007,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,ATH,special_teams_or_athlete,0,1,0,3,1,0,0,1,...,1,3,2,1,1,2,2,2,2,0
1,CB,defense,0,0,2,2,1,0,2,2,...,1,3,1,2,3,2,1,2,2,1
2,EDGE,defense,1,0,2,1,1,2,3,2,...,2,2,2,5,2,2,1,4,4,3
3,IDL,defense,1,1,5,1,2,1,1,3,...,2,1,5,0,2,1,2,1,4,1
4,ILB,defense,0,0,2,1,1,0,3,0,...,1,1,1,0,1,2,0,0,1,4
5,IOL,offense,0,0,4,2,1,1,1,3,...,2,2,4,1,0,0,2,0,4,2
6,K,special_teams_or_athlete,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,1,0
7,LS,special_teams_or_athlete,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
8,OLB,defense,0,1,3,1,2,3,0,1,...,2,3,2,0,2,1,2,2,0,0
9,OT,offense,0,0,0,0,2,1,2,1,...,1,2,2,1,4,3,2,2,3,3


In [19]:
def get_comp_pct_by_class(composition):
    composition_pct = composition.copy()
    class_size = list(composition_pct.sum(axis=0))
    for i, col in enumerate(composition_pct.columns):
        if i <= 1:
            continue

        composition_pct[col] = np.round((composition_pct[col]/class_size[i])*100).astype('int')
    return composition_pct

In [20]:
def get_total_comp_pct(composition):
    composition_pct = composition.copy()
    class_sizes = np.sum(list(composition_pct.sum(axis=0))[2:])
    composition_pct['total'] = 0
    for i, col in enumerate(composition_pct.columns):
        if i <= 1 or col == 'total':
            continue

        composition_pct['total'] = composition_pct['total'] + composition_pct[col]
    composition_pct['total_pct'] = np.round((composition_pct['total'] / class_sizes)*100).astype('int')
    return composition_pct

In [47]:
range_r = [0,15]

## Charlie Strong

In [48]:
strong = composition[['position_simple', 'side', 2015, 2016]]
strong = get_total_comp_pct(strong)

In [49]:
def classify_order_side(simple_position, side):
    for unit in side.keys():
        if simple_position in side[unit]:
            return side[unit].index(simple_position)
        
strong['order_side'] = strong['position_simple'].apply(lambda x: classify_order_side(x, side))

In [50]:
fig = px.line_polar(strong[strong['side'].isin(['offense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
                    range_r = range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/strong_offense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [51]:
fig = px.line_polar(strong[strong['side'].isin(['defense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
#                     range_r = [0, max(strong[strong['side'].isin(['defense', 'offense'])]['total_pct'])+1],
                    range_r=range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/strong_defense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [25]:
strong[strong['side'].isin(['special_teams_or_athlete'])]['total_pct'].sum()

9

## Tom Herman

In [26]:
herman = composition[['position_simple', 'side', 2018, 2019]]
herman = get_total_comp_pct(herman)

In [27]:
def classify_order_side(simple_position, side):
    for unit in side.keys():
        if simple_position in side[unit]:
            return side[unit].index(simple_position)
        
herman['order_side'] = herman['position_simple'].apply(lambda x: classify_order_side(x, side))

In [52]:
fig = px.line_polar(herman[herman['side'].isin(['offense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
                    range_r = range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/herman_offense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [53]:
fig = px.line_polar(herman[herman['side'].isin(['defense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
                    range_r = range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/herman_defense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [54]:
herman[herman['side'].isin(['special_teams_or_athlete'])]['total_pct'].sum()

12

## Sark

In [31]:
sark = composition[['position_simple', 'side', 2022, 2023]]
sark = get_total_comp_pct(sark)

In [32]:
def classify_order_side(simple_position, side):
    for unit in side.keys():
        if simple_position in side[unit]:
            return side[unit].index(simple_position)
        
sark['order_side'] = sark['position_simple'].apply(lambda x: classify_order_side(x, side))

In [55]:
fig = px.line_polar(sark[sark['side'].isin(['offense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
                    range_r = range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/sark_offense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [56]:
fig = px.line_polar(sark[sark['side'].isin(['defense'])].sort_values(by='order_side'), 
                    r='total_pct', 
                    theta='position_simple', 
                    line_close=True,
                    range_r = range_r,
                   color_discrete_sequence=['#bf5700']
                   )

fig.update_traces(fill='toself')
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    'autosize':False,
    'width':650,
    'height':650,
    'font_size':24,
    'font_family':'Helvetica'
})
fig.write_image('../images/texas_recruiting/sark_defense.png')
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [35]:
sark[sark['side'].isin(['special_teams_or_athlete'])]['total_pct'].sum()

8