In [1]:
import pandas as pd
import numpy as np
from transformers import LongformerModel, AutoTokenizer
import torch
import lightgbm
import multiprocessing
import os
from tqdm import tqdm

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [2]:
def relevance_score(pick: int):
    if pick == -1:
        return 0
    return abs(pick - 61) / 6

In [3]:
def ndcg(predicted: list[float], actual: list[float], picks: int = 60):
    predicted = list(predicted)
    actual = list(actual)
    if picks > len(predicted):
        picks = len(predicted)
    dcg = 0
    idcg = 0
    for pick in range(1, picks + 1):
        dcg += predicted[pick - 1] / np.log2(pick + 1)
        idcg += actual[pick - 1] / np.log2(pick + 1)
    return dcg / idcg

In [4]:
def num_aspects(report: str) -> int:
    parts = report.split("…")
    num_parts = 0
    for part in parts:
        if part.strip() != "":
            num_parts += 1
    return num_parts

# Model

In [5]:
class L2RFeatureExtractor():
    def __init__(self):
        self.longformer_model = LongformerModel.from_pretrained("allenai/longformer-base-4096")
        self.longformer_tokenizer = AutoTokenizer.from_pretrained("allenai/longformer-base-4096")
    
    def get_height(self, height: int):
        return height
    
    def get_weight(self, weight: int):
        return weight
    
    def get_age(self, age: float):
        return age
    
    def get_strengths(self, strengths: str):
        return self.longformer_model(
            torch.tensor(self.longformer_tokenizer.encode(strengths)
        ).unsqueeze(0)).pooler_output[0, :].tolist()
    
    def get_weaknesses(self, weaknesses: str):
        return self.longformer_model(
            torch.tensor(self.longformer_tokenizer.encode(weaknesses)
        ).unsqueeze(0)).pooler_output[0, :].tolist()
    
    def get_features(self, strengths: str, weaknesses: str, height: int, weight: int, age: float):
        features = []

        features.append(self.get_height(height))
        features.append(self.get_weight(weight))
        features.append(self.get_age(age))

        features.extend(self.get_strengths(strengths))
        features.extend(self.get_weaknesses(weaknesses))

        return features

In [6]:
class L2RRanker:
    def __init__(self, feature_extractor: 'L2RFeatureExtractor',
                 lgbmranker_params: dict | None = None) -> None:
        self.feature_extractor = feature_extractor
        initial_lgbmranker_params = {'num_leaves': 10, 'learning_rate': 0.005, 'n_estimators': 50,
                                     'n_jobs': multiprocessing.cpu_count(), 'importance_type': 'gain',
                                     'metric': 'ndcg', 'verbosity': 1, 'label_gain': [2**i for i in range(0, 61)]}
        if lgbmranker_params:
            initial_lgbmranker_params.update(lgbmranker_params)
        self.lightgbm_ranker = lightgbm.LGBMRanker().set_params(**initial_lgbmranker_params)
    
    def train(self, train_draft_data: list[str]):
        train_features = []
        train_relevance_scores = []
        train_num_draft_examples = []

        for draft_year in train_draft_data:
            draft_df = pd.read_csv(draft_year)
            draft_df = draft_df.fillna("")
            draft_df["Relevance"] = draft_df["Pick"].apply(relevance_score)
            train_num_draft_examples.append(draft_df.shape[0])
            for name, _, strengths, weaknesses, height, weight, age, rel in tqdm(draft_df.itertuples(index = False, name = None)):
                for name_part in name.split():
                    strengths = strengths.replace(name_part, "")
                    weaknesses = weaknesses.replace(name_part, "")
                train_features.append(self.feature_extractor.get_features(strengths, weaknesses, height, weight, age))
                train_relevance_scores.append(int(rel * 6))
        
        self.lightgbm_ranker.fit(train_features, train_relevance_scores, group = train_num_draft_examples)
    
    def predict(self, test_draft_data: str):
        test_features = []

        draft_df = pd.read_csv(test_draft_data)
        draft_df = draft_df.fillna("")
        draft_df["Relevance"] = draft_df["Pick"].apply(relevance_score)
        for name, _, strengths, weaknesses, height, weight, age, _ in tqdm(draft_df.itertuples(index = False, name = None)):
            for name_part in name.split():
                strengths = strengths.replace(name_part, "")
                weaknesses = weaknesses.replace(name_part, "")
            test_features.append(self.feature_extractor.get_features(strengths, weaknesses, height, weight, age))
        
        return draft_df.iloc[list(np.argsort(self.lightgbm_ranker.predict(test_features))[::-1])]

In [7]:
l2r_feature_extractor = L2RFeatureExtractor()
l2r_ranker = L2RRanker(l2r_feature_extractor)

In [8]:
l2r_ranker.train(["data/2009_nba_draft_prospects.csv", 
                  "data/2010_nba_draft_prospects.csv",
                  "data/2011_nba_draft_prospects.csv",
                  "data/2012_nba_draft_prospects.csv",
                  "data/2013_nba_draft_prospects.csv",
                  "data/2014_nba_draft_prospects.csv",
                  "data/2015_nba_draft_prospects.csv",
                  "data/2016_nba_draft_prospects.csv",
                  "data/2017_nba_draft_prospects.csv",
                  "data/2018_nba_draft_prospects.csv",
                  "data/2019_nba_draft_prospects.csv"])

103it [07:37,  4.44s/it]
103it [07:34,  4.41s/it]
107it [07:59,  4.48s/it]
102it [07:44,  4.55s/it]
101it [07:37,  4.53s/it]
104it [07:49,  4.51s/it]
102it [08:05,  4.76s/it]
102it [07:57,  4.68s/it]
100it [08:50,  5.30s/it]
101it [07:56,  4.72s/it]
102it [07:57,  4.68s/it]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.098619 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 391503
[LightGBM] [Info] Number of data points in the train set: 1127, number of used features: 1539


# 2020

In [9]:
draft_2020_df = pd.read_csv("data/2020_nba_draft_prospects.csv")
draft_2020_df = draft_2020_df.fillna("")
draft_2020_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age
0,Anthony Edwards,1,"A rangy, 6’5 225 guard prospect with a truly f...",Edwards has to improve his assertiveness on bo...,77,225,19.3
1,James Wiseman,2,Wiseman has physical tools that really stand o...,The obvious elephant in the room is that he go...,85,235,19.65
2,LaMelo Ball,3,A physically gifted lead guard prospect with p...,His overall jump shot lacks any real consisten...,78,180,19.25
3,Patrick Williams,4,"An explosive, “freakish” combo forward with in...",There are question marks about his want to as ...,79,215,19.24
4,Isaac Okoro,5,Fits the prototype physically of an NBA small ...,Okoro is a work in progress when it comes to h...,78,215,19.82
5,Onyeka Okongwu,6,Exceptional rim protector who can block shots ...,Must polish his offensive game as he can at ti...,81,235,19.95
6,Killian Hayes,7,"Tall, left handed point guard (6-5), with very...","Good, but not an elite athlete … Lacks an expl...",77,195,19.33
7,Obi Toppin,8,Toppin is a late blooming frontcourt prospect ...,"Though a standout athlete with nice size, Topp...",81,220,22.73
8,Deni Avdija,9,All around talented wing with great size … Ver...,Has a below average wingspan (6-9) for his siz...,81,210,19.89
9,Jalen Smith,10,"A long, versatile power forward with solid all...","While he is bouncy, he is not the most fluid o...",82,220,20.69


In [10]:
draft_2020_df["Relevance"] = draft_2020_df["Pick"].apply(relevance_score)
draft_2020_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
0,Anthony Edwards,1,"A rangy, 6’5 225 guard prospect with a truly f...",Edwards has to improve his assertiveness on bo...,77,225,19.3,10.0
1,James Wiseman,2,Wiseman has physical tools that really stand o...,The obvious elephant in the room is that he go...,85,235,19.65,9.833333
2,LaMelo Ball,3,A physically gifted lead guard prospect with p...,His overall jump shot lacks any real consisten...,78,180,19.25,9.666667
3,Patrick Williams,4,"An explosive, “freakish” combo forward with in...",There are question marks about his want to as ...,79,215,19.24,9.5
4,Isaac Okoro,5,Fits the prototype physically of an NBA small ...,Okoro is a work in progress when it comes to h...,78,215,19.82,9.333333
5,Onyeka Okongwu,6,Exceptional rim protector who can block shots ...,Must polish his offensive game as he can at ti...,81,235,19.95,9.166667
6,Killian Hayes,7,"Tall, left handed point guard (6-5), with very...","Good, but not an elite athlete … Lacks an expl...",77,195,19.33,9.0
7,Obi Toppin,8,Toppin is a late blooming frontcourt prospect ...,"Though a standout athlete with nice size, Topp...",81,220,22.73,8.833333
8,Deni Avdija,9,All around talented wing with great size … Ver...,Has a below average wingspan (6-9) for his siz...,81,210,19.89,8.666667
9,Jalen Smith,10,"A long, versatile power forward with solid all...","While he is bouncy, he is not the most fluid o...",82,220,20.69,8.5


## First Baseline

In [11]:
draft_2020_df_first_baseline = draft_2020_df.sort_values(["Age", "Height"], ascending = [True, False])
draft_2020_df_first_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
16,Aleksej Pokusevski,17,Versatile forward with a guard skill set … Exc...,"Has a really thin frame … Narrow shoulders, ma...",84,190,18.91,7.333333
3,Patrick Williams,4,"An explosive, “freakish” combo forward with in...",There are question marks about his want to as ...,79,215,19.24,9.5
85,Malcolm Cazalon,-1,"Athletic, lefty, guard/forward … Nice, wide sh...","Has a thin frame, he needs to bulk up his uppe...",78,185,19.24,0.0
2,LaMelo Ball,3,A physically gifted lead guard prospect with p...,His overall jump shot lacks any real consisten...,78,180,19.25,9.666667
0,Anthony Edwards,1,"A rangy, 6’5 225 guard prospect with a truly f...",Edwards has to improve his assertiveness on bo...,77,225,19.3,10.0
6,Killian Hayes,7,"Tall, left handed point guard (6-5), with very...","Good, but not an elite athlete … Lacks an expl...",77,195,19.33,9.0
33,Theo Maledon,34,Smart point guard with great size for his posi...,Not an overwhelming athlete but solid and uses...,77,185,19.45,4.5
42,Jahmius Ramsey,43,"A mature, tough nosed freshman combo with intr...",Undersized at the 2G position at 6’4 … More o...,76,195,19.46,3.0
15,Isaiah Stewart,16,Rugged big man with a tireless motor and youth...,Below the rim athlete … Very long with decent ...,80,245,19.51,7.5
12,Kira Lewis,13,"Long, quick, athletic point guard with good sc...",Can improve in his ability to score from the p...,75,170,19.63,8.0


In [12]:
ndcg(draft_2020_df_first_baseline["Relevance"], draft_2020_df["Relevance"])

0.8086882425729351

In [13]:
ndcg(draft_2020_df_first_baseline["Relevance"], draft_2020_df["Relevance"], 30)

0.751018618741818

In [14]:
ndcg(draft_2020_df_first_baseline["Relevance"], draft_2020_df["Relevance"], 14)

0.733230940213838

## Second Baseline

In [15]:
draft_2020_df_second_baseline = draft_2020_df
draft_2020_df_second_baseline["Number_Strengths"] = draft_2020_df_second_baseline["Strengths"].apply(num_aspects)
draft_2020_df_second_baseline["Number_Weaknesses"] = draft_2020_df_second_baseline["Weaknesses"].apply(num_aspects)
draft_2020_df_second_baseline["Net_Strengths"] = draft_2020_df_second_baseline["Number_Strengths"] - draft_2020_df_second_baseline["Number_Weaknesses"]
draft_2020_df_second_baseline = draft_2020_df_second_baseline.sort_values(["Net_Strengths", "Number_Strengths", "Number_Weaknesses"], ascending = [False, False, True])
draft_2020_df_second_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance,Number_Strengths,Number_Weaknesses,Net_Strengths
30,Tyrell Terry,31,Sharp shooting point guard with a slight frame...,"At just 160 lbs, he has extremely thin legs an...",75,170,20.15,5.0,30,13,17
31,Vernon Carey,32,"Steady, smooth bigman with scoring ability and...",Lacks elite speed and athleticism … Lack of fo...,82,270,19.74,4.833333,27,11,16
38,Elijah Hughes,39,Hughes is a smooth athlete with nice explosive...,Lacks ideal height for the wing position … Ath...,78,230,22.71,3.666667,23,8,15
22,Leandro Bolmaro,23,"Versatile guard, who can be used in every peri...","He looks longer than he actually is, since he ...",78,180,20.2,6.333333,32,18,14
43,Marko Simonovic,44,"Fluid 7-footer, with acceptable level of athle...","Decent athlete, but not overly athletic by NBA...",84,215,21.11,2.833333,34,21,13
33,Theo Maledon,34,Smart point guard with great size for his posi...,Not an overwhelming athlete but solid and uses...,77,185,19.45,4.5,32,19,13
63,Abdoulaye N'Doye,-1,"Versatile combo guard, with length, who can pl...","Late bloomer and is shown at times, he is stil...",79,200,22.71,0.0,30,17,13
2,LaMelo Ball,3,A physically gifted lead guard prospect with p...,His overall jump shot lacks any real consisten...,78,180,19.25,9.666667,25,12,13
12,Kira Lewis,13,"Long, quick, athletic point guard with good sc...",Can improve in his ability to score from the p...,75,170,19.63,8.0,24,11,13
56,Reggie Perry,57,Physical specimen type of athlete with great f...,Not a big shot blocker or rim protector … All ...,81,250,20.68,0.666667,21,8,13


In [16]:
ndcg(draft_2020_df_second_baseline["Relevance"], draft_2020_df["Relevance"])

0.7273686948784405

In [17]:
ndcg(draft_2020_df_second_baseline["Relevance"], draft_2020_df["Relevance"], 30)

0.5506800651555249

In [18]:
ndcg(draft_2020_df_second_baseline["Relevance"], draft_2020_df["Relevance"], 14)

0.48093486939349095

## L2R Model

In [19]:
draft_2020_df_model = l2r_ranker.predict("data/2020_nba_draft_prospects.csv")
draft_2020_df_model[:14]

0it [00:00, ?it/s]

103it [07:55,  4.62s/it]


Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
12,Kira Lewis,13,"Long, quick, athletic point guard with good sc...",Can improve in his ability to score from the p...,75,170,19.63,8.0
3,Patrick Williams,4,"An explosive, “freakish” combo forward with in...",There are question marks about his want to as ...,79,215,19.24,9.5
2,LaMelo Ball,3,A physically gifted lead guard prospect with p...,His overall jump shot lacks any real consisten...,78,180,19.25,9.666667
47,Nico Mannion,48,Mannion is a crafty 6’3 guard who has a nice b...,Though on the surface Mannion’s prospect check...,75,190,19.7,2.166667
10,Devin Vassell,11,One of the most athletic wings in this year’s ...,Can improve on his ability to create offense f...,79,200,20.25,8.333333
1,James Wiseman,2,Wiseman has physical tools that really stand o...,The obvious elephant in the room is that he go...,85,235,19.65,9.833333
23,RJ Hampton,24,A wiry 6’4 guard (6’7 wingspan) with intriguin...,Shows raw fundamentals and polish in general o...,76,175,19.79,6.166667
8,Deni Avdija,9,All around talented wing with great size … Ver...,Has a below average wingspan (6-9) for his siz...,81,210,19.89,8.666667
0,Anthony Edwards,1,"A rangy, 6’5 225 guard prospect with a truly f...",Edwards has to improve his assertiveness on bo...,77,225,19.3,10.0
86,Khalil Whitney,-1,6’6 athletic forward … Good size and length wi...,Can get stronger in both the upper and lower b...,79,205,19.87,0.0


In [20]:
ndcg(draft_2020_df_model["Relevance"], draft_2020_df["Relevance"])

0.8240587421324435

In [21]:
ndcg(draft_2020_df_model["Relevance"], draft_2020_df["Relevance"], 30)

0.76354467406514

In [22]:
ndcg(draft_2020_df_model["Relevance"], draft_2020_df["Relevance"], 14)

0.7702542208425405

# 2021

In [23]:
draft_2021_df = pd.read_csv("data/2021_nba_draft_prospects.csv")
draft_2021_df = draft_2021_df.fillna("")
draft_2021_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age
0,Cade Cunningham,1,A prized 6’8 220+ lb lead guard prospect who h...,"While he is a physically gifted prospect, Cunn...",80,220,19.85
1,Jalen Green,2,Green is a high upside prospect with elite ath...,"Green is a good shooter, but is not yet a snip...",77,180,19.48
2,Evan Mobley,3,Mobley has a chance to be a generational bigma...,The biggest hurdle for Evan Mobley is himself....,84,215,20.13
3,Scottie Barnes,4,Versatility and power are the two greatest str...,The biggest thing holding Barnes back from a h...,80,225,20.01
4,Jalen Suggs,5,"Lead guard with great leadership, toughness an...",The most glaring weakness for the star freshma...,76,205,20.17
5,Josh Giddey,6,Natural talented point guard with great size …...,"Average athlete… Has a really thin frame, and ...",80,205,18.81
6,Jonathan Kuminga,7,"Listed at 6’7 and 220 pounds, Kuminga is a phy...",Lacks consistency shooting … Kuminga is an inc...,79,220,18.82
7,Franz Wagner,8,Wagner has ideal size and versatility for a mo...,Wagner may struggle to create his own shot in ...,82,220,19.93
8,Davion Mitchell,9,"Mitchell is an incredibly efficient scorer, wi...",Needs to improve as a pick and roll facilitato...,73,200,22.91
9,Ziaire Williams,10,High level athlete who uses his size and verti...,Williams struggled to stay on the court for th...,81,190,19.89


In [24]:
draft_2021_df["Relevance"] = draft_2021_df["Pick"].apply(relevance_score)
draft_2021_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
0,Cade Cunningham,1,A prized 6’8 220+ lb lead guard prospect who h...,"While he is a physically gifted prospect, Cunn...",80,220,19.85,10.0
1,Jalen Green,2,Green is a high upside prospect with elite ath...,"Green is a good shooter, but is not yet a snip...",77,180,19.48,9.833333
2,Evan Mobley,3,Mobley has a chance to be a generational bigma...,The biggest hurdle for Evan Mobley is himself....,84,215,20.13,9.666667
3,Scottie Barnes,4,Versatility and power are the two greatest str...,The biggest thing holding Barnes back from a h...,80,225,20.01,9.5
4,Jalen Suggs,5,"Lead guard with great leadership, toughness an...",The most glaring weakness for the star freshma...,76,205,20.17,9.333333
5,Josh Giddey,6,Natural talented point guard with great size …...,"Average athlete… Has a really thin frame, and ...",80,205,18.81,9.166667
6,Jonathan Kuminga,7,"Listed at 6’7 and 220 pounds, Kuminga is a phy...",Lacks consistency shooting … Kuminga is an inc...,79,220,18.82,9.0
7,Franz Wagner,8,Wagner has ideal size and versatility for a mo...,Wagner may struggle to create his own shot in ...,82,220,19.93,8.833333
8,Davion Mitchell,9,"Mitchell is an incredibly efficient scorer, wi...",Needs to improve as a pick and roll facilitato...,73,200,22.91,8.666667
9,Ziaire Williams,10,High level athlete who uses his size and verti...,Williams struggled to stay on the court for th...,81,190,19.89,8.5


## First Baseline

In [25]:
draft_2021_df_first_baseline = draft_2021_df.sort_values(["Age", "Height"], ascending = [True, False])
draft_2021_df_first_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
11,Joshua Primo,12,A lean 6’5 wing who possesses solid physical a...,"Not a particularly explosive athlete, doesn’t ...",77,190,18.61,8.166667
5,Josh Giddey,6,Natural talented point guard with great size …...,"Average athlete… Has a really thin frame, and ...",80,205,18.81,9.166667
6,Jonathan Kuminga,7,"Listed at 6’7 and 220 pounds, Kuminga is a phy...",Lacks consistency shooting … Kuminga is an inc...,79,220,18.82,9.0
27,Jaden Springer,28,"Scrappy, young combo guard with a defensive fo...",Lacks elite explosiveness … Appears to be more...,76,200,18.85,5.5
36,JT Thor,37,"A very rangy 6’10 stretch four, who has genera...",Has one of the most impressive highlight reels...,82,205,18.94,4.0
44,Juhann Begarin,45,Athletic combo guard with good size … Versatil...,"Inconsistent … Kind of raw, his game needs pol...",78,215,18.99,2.666667
15,Alperen Sengun,16,"Physical center, with a lot of determination a...",Average athlete by NBA standards … Small for a...,81,240,19.02,7.5
13,Moses Moody,14,Good size for a wing with a frame that should ...,Doesn’t play with speed/enough athleticism to ...,78,210,19.18,7.833333
22,Usman Garuba,23,"Blue collar forward/center, who always gives 1...",He is more a 5 than a 4 for now on offense bec...,80,230,19.4,6.333333
20,Keon Johnson,21,A freakish athlete with great mental make up a...,A work in progress offensively. Shot only 27% ...,77,185,19.4,6.666667


In [26]:
ndcg(draft_2021_df_first_baseline["Relevance"], draft_2021_df["Relevance"])

0.7996542223484874

In [27]:
ndcg(draft_2021_df_first_baseline["Relevance"], draft_2021_df["Relevance"], 30)

0.738268226677913

In [28]:
ndcg(draft_2021_df_first_baseline["Relevance"], draft_2021_df["Relevance"], 14)

0.7424909106667161

## Second Baseline

In [29]:
draft_2021_df_second_baseline = draft_2021_df
draft_2021_df_second_baseline["Number_Strengths"] = draft_2021_df_second_baseline["Strengths"].apply(num_aspects)
draft_2021_df_second_baseline["Number_Weaknesses"] = draft_2021_df_second_baseline["Weaknesses"].apply(num_aspects)
draft_2021_df_second_baseline["Net_Strengths"] = draft_2021_df_second_baseline["Number_Strengths"] - draft_2021_df_second_baseline["Number_Weaknesses"]
draft_2021_df_second_baseline = draft_2021_df_second_baseline.sort_values(["Net_Strengths", "Number_Strengths", "Number_Weaknesses"], ascending = [False, False, True])
draft_2021_df_second_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance,Number_Strengths,Number_Weaknesses,Net_Strengths
0,Cade Cunningham,1,A prized 6’8 220+ lb lead guard prospect who h...,"While he is a physically gifted prospect, Cunn...",80,220,19.85,10.0,22,8,14
22,Usman Garuba,23,"Blue collar forward/center, who always gives 1...",He is more a 5 than a 4 for now on offense bec...,80,230,19.4,6.333333,29,16,13
8,Davion Mitchell,9,"Mitchell is an incredibly efficient scorer, wi...",Needs to improve as a pick and roll facilitato...,73,200,22.91,8.666667,25,12,13
12,Chris Duarte,13,Gifted and natural scorer … Scored over 17 poi...,"At 23 years old, Duarte’s most glaring weaknes...",78,190,24.14,8.0,19,6,13
14,Corey Kispert,15,"Mature, focused four year college player with ...",May take some time to adjust to the quickness ...,79,225,22.42,7.666667,23,11,12
1,Jalen Green,2,Green is a high upside prospect with elite ath...,"Green is a good shooter, but is not yet a snip...",77,180,19.48,9.833333,20,8,12
3,Scottie Barnes,4,Versatility and power are the two greatest str...,The biggest thing holding Barnes back from a h...,80,225,20.01,9.5,18,6,12
7,Franz Wagner,8,Wagner has ideal size and versatility for a mo...,Wagner may struggle to create his own shot in ...,82,220,19.93,8.833333,22,11,11
18,Kai Jones,19,Jones is an explosive athlete who plays with o...,"Jones stands at 6’11 but at 220 lbs, currently...",83,220,20.54,7.0,18,7,11
15,Alperen Sengun,16,"Physical center, with a lot of determination a...",Average athlete by NBA standards … Small for a...,81,240,19.02,7.5,29,19,10


In [30]:
ndcg(draft_2021_df_second_baseline["Relevance"], draft_2021_df["Relevance"])

0.8322794754578953

In [31]:
ndcg(draft_2021_df_second_baseline["Relevance"], draft_2021_df["Relevance"], 30)

0.7572219418526501

In [32]:
ndcg(draft_2021_df_second_baseline["Relevance"], draft_2021_df["Relevance"], 14)

0.8351294763835987

## L2R Model

In [33]:
draft_2021_df_model = l2r_ranker.predict("data/2021_nba_draft_prospects.csv")
draft_2021_df_model[:14]

0it [00:00, ?it/s]

102it [07:50,  4.61s/it]


Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
20,Keon Johnson,21,A freakish athlete with great mental make up a...,A work in progress offensively. Shot only 27% ...,77,185,19.4,6.666667
47,Sharife Cooper,48,"A highly talented, creative lead guard prospec...","Generously listed at 6’1 165, and without much...",73,180,20.15,2.166667
2,Evan Mobley,3,Mobley has a chance to be a generational bigma...,The biggest hurdle for Evan Mobley is himself....,84,215,20.13,9.666667
26,Cameron Thomas,27,A 6’3 200 lb perimeter scoring machine (4th in...,"A bit one-dimensional as a player, mostly wire...",75,200,19.81,5.666667
42,Greg Brown,43,Brown is a freak athlete with a great deal of ...,Needs to become a better overall offensive pla...,80,205,19.92,3.0
6,Jonathan Kuminga,7,"Listed at 6’7 and 220 pounds, Kuminga is a phy...",Lacks consistency shooting … Kuminga is an inc...,79,220,18.82,9.0
9,Ziaire Williams,10,High level athlete who uses his size and verti...,Williams struggled to stay on the court for th...,81,190,19.89,8.5
15,Alperen Sengun,16,"Physical center, with a lot of determination a...",Average athlete by NBA standards … Small for a...,81,240,19.02,7.5
4,Jalen Suggs,5,"Lead guard with great leadership, toughness an...",The most glaring weakness for the star freshma...,76,205,20.17,9.333333
11,Joshua Primo,12,A lean 6’5 wing who possesses solid physical a...,"Not a particularly explosive athlete, doesn’t ...",77,190,18.61,8.166667


In [34]:
ndcg(draft_2021_df_model["Relevance"], draft_2021_df["Relevance"])

0.7715103330368926

In [35]:
ndcg(draft_2021_df_model["Relevance"], draft_2021_df["Relevance"], 30)

0.7017883743969205

In [36]:
ndcg(draft_2021_df_model["Relevance"], draft_2021_df["Relevance"], 14)

0.7439132287655769

# 2022

In [37]:
draft_2022_df = pd.read_csv("data/2022_nba_draft_prospects.csv")
draft_2022_df = draft_2022_df.fillna("")
draft_2022_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age
0,Paolo Banchero,1,"Has good size and length for his position, sta...","For all of his offensive gifts, still has room...",82,250,19.62
1,Chet Holmgren,2,Extremely skilled frontcourt player who can in...,Lack of physical strength (195 lbs) remains hi...,84,195,20.16
2,Jabari Smith,3,Wiry 6’10 combo / stretch four with long arms ...,Rebounding numbers could stand some improvemen...,82,220,19.13
3,Keegan Murray,4,"A 6’8 225 frontcourt player with the frame, le...",Will have some questions about his true positi...,80,225,21.86
4,Jaden Ivey,5,"An aggressive explosive, 6’4 200 lb combo guar...",Ivey’s hard charging energy and play style can...,76,195,20.37
5,Bennedict Mathurin,6,"6’7 wing with tremendous size, maturity, explo...",The biggest hurdle for Mathurin at this point ...,78,205,20.02
6,Shaedon Sharpe,7,An incredibly athletic wing who ranked as the ...,"Focus, intensity and maturity remain mysteries...",77,200,19.08
7,Dyson Daniels,8,6-6 combo guard with great size and length for...,While his skill potential is high. Feel for th...,79,195,19.28
8,Jeremy Sochan,9,"The less heralded freshman Baylor forward, Soc...",Most lottery picks have more scoring productio...,81,230,19.11
9,Johnny Davis,10,A 6’5 190 SG/SF with the athleticism and wiry ...,Was asked to be a go-to scorer last season out...,77,195,20.33


In [38]:
draft_2022_df["Relevance"] = draft_2022_df["Pick"].apply(relevance_score)
draft_2022_df[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
0,Paolo Banchero,1,"Has good size and length for his position, sta...","For all of his offensive gifts, still has room...",82,250,19.62,10.0
1,Chet Holmgren,2,Extremely skilled frontcourt player who can in...,Lack of physical strength (195 lbs) remains hi...,84,195,20.16,9.833333
2,Jabari Smith,3,Wiry 6’10 combo / stretch four with long arms ...,Rebounding numbers could stand some improvemen...,82,220,19.13,9.666667
3,Keegan Murray,4,"A 6’8 225 frontcourt player with the frame, le...",Will have some questions about his true positi...,80,225,21.86,9.5
4,Jaden Ivey,5,"An aggressive explosive, 6’4 200 lb combo guar...",Ivey’s hard charging energy and play style can...,76,195,20.37,9.333333
5,Bennedict Mathurin,6,"6’7 wing with tremendous size, maturity, explo...",The biggest hurdle for Mathurin at this point ...,78,205,20.02,9.166667
6,Shaedon Sharpe,7,An incredibly athletic wing who ranked as the ...,"Focus, intensity and maturity remain mysteries...",77,200,19.08,9.0
7,Dyson Daniels,8,6-6 combo guard with great size and length for...,While his skill potential is high. Feel for th...,79,195,19.28,8.833333
8,Jeremy Sochan,9,"The less heralded freshman Baylor forward, Soc...",Most lottery picks have more scoring productio...,81,230,19.11,8.666667
9,Johnny Davis,10,A 6’5 190 SG/SF with the athleticism and wiry ...,Was asked to be a go-to scorer last season out...,77,195,20.33,8.5


## First Baseline

In [39]:
draft_2022_df_first_baseline = draft_2022_df.sort_values(["Age", "Height"], ascending = [True, False])
draft_2022_df_first_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
67,Dominick Barlow,-1,A player with a high ceiling and good potentia...,The NBA Draft Combine helped identify areas of...,82,220,18.46,0.0
12,Jalen Duren,13,"A physically gifted, highly touted 6’11 250 bi...",Motivation level is one of the key Duren is a ...,83,250,18.61,8.0
53,Yannick Nzosa,54,"Athletic, left handed center… Great size for h...",Extremely thin frame… Needs to add considerabl...,83,200,18.62,1.166667
15,AJ Griffin,16,"Prototypical size for an NBA wing at 6-6, 220 ...","Despite his great length and athleticism, lack...",78,220,18.84,7.5
41,Trevor Keels,42,"A physical, imposing combo guard who gets to t...",Not really a great fit at either guard positio...,76,225,18.84,3.166667
43,Ryan Rollins,44,"A smooth, talented ball-handler and initiator ...",Rollins put up great numbers in two seasons fo...,75,180,18.99,2.833333
62,Jean Montero,-1,"A natural scorer, the Overtime Elite guard has...",Montero lacks standout length (6-2 with a 6-4 ...,74,170,18.99,0.0
26,Nikola Jovic,27,All around combo forward … Has point forward p...,Needs to add some weight … Despite his fightin...,83,225,19.05,5.666667
6,Shaedon Sharpe,7,An incredibly athletic wing who ranked as the ...,"Focus, intensity and maturity remain mysteries...",77,200,19.08,9.0
10,Ousmane Dieng,11,Versatile point forward … Possesses great size...,"Has put on some weight, but he still has a thi...",81,185,19.1,8.333333


In [40]:
ndcg(draft_2022_df_first_baseline["Relevance"], draft_2022_df["Relevance"])

0.6965407237920551

In [41]:
ndcg(draft_2022_df_first_baseline["Relevance"], draft_2022_df["Relevance"], 30)

0.5105968016806653

In [42]:
ndcg(draft_2022_df_first_baseline["Relevance"], draft_2022_df["Relevance"], 14)

0.4922581047707503

## Second Baseline

In [43]:
draft_2022_df_second_baseline = draft_2022_df
draft_2022_df_second_baseline["Number_Strengths"] = draft_2022_df_second_baseline["Strengths"].apply(num_aspects)
draft_2022_df_second_baseline["Number_Weaknesses"] = draft_2022_df_second_baseline["Weaknesses"].apply(num_aspects)
draft_2022_df_second_baseline["Net_Strengths"] = draft_2022_df_second_baseline["Number_Strengths"] - draft_2022_df_second_baseline["Number_Weaknesses"]
draft_2022_df_second_baseline = draft_2022_df_second_baseline.sort_values(["Net_Strengths", "Number_Strengths", "Number_Weaknesses"], ascending = [False, False, True])
draft_2022_df_second_baseline[:14]

Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance,Number_Strengths,Number_Weaknesses,Net_Strengths
25,Wendell Moore,26,A skilled and physical slasher with terrific p...,Really struggled in his first two seasons at D...,77,215,20.78,5.833333,24,7,17
2,Jabari Smith,3,Wiry 6’10 combo / stretch four with long arms ...,Rebounding numbers could stand some improvemen...,82,220,19.13,9.666667,23,10,13
59,Ron Harper Jr.,-1,Bigtime college player who despite not passing...,Clearly slow and unathletic from an NBA perspe...,77,240,22.21,0.0,22,10,12
71,Orlando Robinson,-1,Highly skilled bigman with great length and in...,Average athlete by NBA standards … Feet are a ...,83,245,21.97,0.0,20,9,11
0,Paolo Banchero,1,"Has good size and length for his position, sta...","For all of his offensive gifts, still has room...",82,250,19.62,10.0,18,7,11
33,Jaylin Williams,34,"A relentless, versatile center who is built fo...",Is comfortable shooting on the perimeter and h...,82,235,20.0,4.5,17,6,11
43,Ryan Rollins,44,"A smooth, talented ball-handler and initiator ...",Rollins put up great numbers in two seasons fo...,75,180,18.99,2.833333,17,6,11
49,Matteo Spagnolo,50,Naturally talented combo guard… Good size for ...,Not a real point guard just yet… A little unde...,77,195,19.46,1.833333,27,17,10
38,Khalifa Diop,39,"Athletic, blue collar big… Has prototypical si...",Not an elite athlete… Needs time to gather bef...,83,230,20.45,3.666667,26,16,10
14,Mark Williams,15,"Developing center, rim runner, rim protector w...",Yet to show that his range can expand to an NB...,85,240,20.53,7.666667,17,7,10


In [44]:
ndcg(draft_2022_df_second_baseline["Relevance"], draft_2022_df["Relevance"])

0.6855341484639138

In [45]:
ndcg(draft_2022_df_second_baseline["Relevance"], draft_2022_df["Relevance"], 30)

0.5754285124898122

In [46]:
ndcg(draft_2022_df_second_baseline["Relevance"], draft_2022_df["Relevance"], 14)

0.5249416260241548

## L2R Model

In [47]:
draft_2022_df_model = l2r_ranker.predict("data/2022_nba_draft_prospects.csv")
draft_2022_df_model[:14]

0it [00:00, ?it/s]

102it [08:19,  4.90s/it]


Unnamed: 0,Name,Pick,Strengths,Weaknesses,Height,Weight,Age,Relevance
24,Blake Wesley,25,NBA athlete with a great deal of upside left t...,"Due to being an unheralded recruit, may take s...",76,185,19.28,6.0
60,Justin Lewis,-1,"Great length for his position, has the power a...",Just 6’6.25″ barefoot but his 7’2″ wingspan ma...,79,235,20.21,0.0
5,Bennedict Mathurin,6,"6’7 wing with tremendous size, maturity, explo...",The biggest hurdle for Mathurin at this point ...,78,205,20.02,9.166667
29,Peyton Watson,30,Highly rated recruit with a lot of upside but ...,Lack of readiness makes him a project for the ...,80,205,19.79,5.166667
19,Malaki Branham,20,An athletic three-level scorer who can create ...,"Shoots smoothly as a spot-up shooter, but look...",77,195,19.13,6.833333
36,Jaden Hardy,37,A highly touted recruit who chose to play with...,A disappointing season with the G-League Ignit...,76,200,19.98,4.0
8,Jeremy Sochan,9,"The less heralded freshman Baylor forward, Soc...",Most lottery picks have more scoring productio...,81,230,19.11,8.666667
6,Shaedon Sharpe,7,An incredibly athletic wing who ranked as the ...,"Focus, intensity and maturity remain mysteries...",77,200,19.08,9.0
12,Jalen Duren,13,"A physically gifted, highly touted 6’11 250 bi...",Motivation level is one of the key Duren is a ...,83,250,18.61,8.0
58,John Butler,-1,"Upside, upside upside … Butler’s insane length...","At just 174.4 lbs, Butler is rail thin and wil...",85,175,19.56,0.0


In [48]:
ndcg(draft_2022_df_model["Relevance"], draft_2022_df["Relevance"])

0.7023395837786813

In [49]:
ndcg(draft_2022_df_model["Relevance"], draft_2022_df["Relevance"], 30)

0.5957083791531073

In [50]:
ndcg(draft_2022_df_model["Relevance"], draft_2022_df["Relevance"], 14)

0.5840653361201581