In [125]:
import os 
import pandas as pd
import numpy as np
folder_path = './data'

### Adds headers to files

In [5]:
header = ['pick_num', 'team_name', 'player', 'player_team', 'player_pos']

for filename in os.listdir(folder_path):
    if not os.path.isdir(os.path.join(folder_path, filename)): 
        file_path = os.path.join(folder_path, filename)
        with open(file_path, 'r') as file:
            lines = file.readlines()
        lines.insert(0, ','.join(header) + '\n')
        with open(file_path, 'w') as file:
            file.writelines(lines)

### Checks if theres 12 teams in each draft

In [24]:
for filename in os.listdir(folder_path):
        if not os.path.isdir(os.path.join(folder_path, filename)):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)  
            teams = df['team_name'].unique()
            if len(teams) != 12:
                print(f"File '{filename}': Number of unique teams does not equal 12.")

### Encode team names

In [25]:

for filename in os.listdir(folder_path):
    if not os.path.isdir(os.path.join(folder_path, filename)):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)  
        teams = df['team_name'].unique()
        team_mapping = {team: f'Team{i+1}' for i, team in enumerate(teams)}
        df['team_name'] = df['team_name'].map(team_mapping)
        df.to_csv(file_path, index=False) 


### Checks if pick order in each draft is correct

In [46]:
for filename in os.listdir(folder_path):
    if not os.path.isdir(os.path.join(folder_path, filename)):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path) 
        expected_draft_order = (list(range(1, 13)) + list(range(12, 0, -1)))*15
        expected_draft_order = expected_draft_order[:int(len(expected_draft_order)/2)]
        draft_order_in_file = [int(''.join(filter(str.isdigit, element))) for element in df['team_name']]
        if expected_draft_order != draft_order_in_file:
                print(f"File '{filename}': Draft order incorrect")

### Adding ADP values to each draft 

In [70]:
adp_df = pd.read_csv('FantasyPros_2023_Overall_ADP_Rankings.csv')
adp_df = adp_df.rename(columns={'AVG': 'ADP', 'Player' : 'player'})
adp_df = adp_df[['player','ADP']]
adp_df = adp_df.dropna(how='all')

#high default ADP value for Defense and Kickers
default_adp = adp_df['ADP'].max() + 10

for filename in os.listdir(folder_path):
    if not os.path.isdir(os.path.join(folder_path, filename)):
        file_path = os.path.join(folder_path, filename)
        draft_df = pd.read_csv(file_path)
        # draft_df = draft_df.merge(adp_df, on='player', how='left')
        # draft_df['ADP'] = draft_df['ADP'].fillna(np.where(draft_df['player_pos'].isin(['DST', 'K']), default_adp, np.nan))
        # missing_adp_players = draft_df[draft_df['ADP'].isnull()]['player'].unique()
        missing_adp_players = []

        for i in range(len(draft_df)):
                # If the player's ADP is missing, add to the list and assign default_adp
                if pd.isnull(draft_df.loc[i, 'ADP']):
                    missing_adp_players.append(draft_df.loc[i, 'player'])
                    draft_df.loc[i, 'ADP'] = default_adp
                    
        if len(missing_adp_players) > 0:
            print(f"In file {filename}, these players have missing ADP values: {missing_adp_players}")

        # Save the dataframe back to the file
        draft_df.to_csv(file_path, index=False)

### Create State Representations (Model Input)

In [120]:
default_adp = adp_df['ADP'].max() + 10

#Representation 1: Accounts for players left on the board

#Returns 4 element lists for each position. First element is the number of players at that position
#that haven't been drafted. Next 3 elements are smallest ADP values of undrafted players at that position
def get_remaining_players_repr(df, current_pick_num):
    remaining_players = df[df['pick_num'] > current_pick_num]
    remaining_players = remaining_players.sort_values('ADP')

    positions = ['QB', 'RB', 'WR', 'TE', 'DST', 'K']

    remaining_repr = {}
    for pos in positions:
        pos_players = remaining_players[remaining_players['player_pos'] == pos]
        pos_count = len(pos_players)
        pos_adp_values = pos_players['ADP'].nsmallest(3).tolist()

        # If less than 3 players, pad w/ default_adp (max + 10)
        while len(pos_adp_values) < 3:
            pos_adp_values.append(default_adp)

        remaining_repr[pos] = [pos_count] + pos_adp_values
    ret_df = pd.DataFrame(remaining_repr).T
    return ret_df.values

#Representation 2: Accounts for current roster of the team picking 

#Returns list of team's roster indicating position slots that can be filled 
# 0 = Slot filled 
def get_team_roster_repr(df, team_name, current_pick_num):
    df = df[df['pick_num'] < current_pick_num]
    team_picks = df[df['team_name'] == team_name]
    position_counts = pd.get_dummies(team_picks['player_pos']).sum()

    positions = ['QB', 'RB', 'WR', 'TE', 'DST', 'K']
    max_players = {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'DST': 1, 'K': 1}
    flex = 1  # flex position
    bench = 6

    roster_repr = []
    for pos in positions:
        if pos in position_counts:
            roster_repr.append(max_players[pos] - position_counts[pos])
        else:
            roster_repr.append(max_players[pos])

    # Handle flex position
    if 'RB' in position_counts:
        rb_extra = max(0, position_counts['RB'] - max_players['RB'])
        if rb_extra > 0:
            flex -= 1
            bench -= rb_extra - 1
    if 'WR' in position_counts and flex > 0:
        wr_extra = max(0, position_counts['WR'] - max_players['WR'])
        if wr_extra > 0:
            flex -= 1
            bench -= wr_extra - 1

    roster_repr.append(flex)
    bench -= sum(max(0, position_counts.get(pos, 0) - max_players[pos]) for pos in positions)
    roster_repr.append(bench)
    return np.array(roster_repr)

#Forms a single state representation based on the two representations 
def get_state_representation(df, current_pick_num, team_name, max_players=180):
    team_roster_repr = get_team_roster_repr(df, team_name, current_pick_num)
    remaining_players_repr = get_remaining_players_repr(df, current_pick_num)
    state_repr = np.concatenate([team_roster_repr, remaining_players_repr], axis=None)
    return state_repr

### Create Input/Output Pairs to Train Model 

In [121]:
inputs = []
outputs = []

expected_draft_order = (list(range(1, 13)) + list(range(12, 0, -1)))*15
expected_draft_order = expected_draft_order[:int(len(expected_draft_order)/2)]

for filename in os.listdir(folder_path):
    if not os.path.isdir(os.path.join(folder_path, filename)):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        for pick_num in range(1, df['pick_num'].max()):
            # generate the state representation for the current pick
            teamID = expected_draft_order[pick_num - 1]
            state_repr = get_state_representation(df, pick_num, f'Team{teamID}')  # replace team_name with actual team name
            # get the position of the player picked next
            next_pick_pos = df.loc[df['pick_num'] == pick_num+1, 'player_pos'].values[0]
            
            # store the input-output pair
            inputs.append(state_repr)
            outputs.append(next_pick_pos)        

In [128]:
len(inputs)
len(outputs)

5370

### Split Data

In [126]:
from sklearn.model_selection import train_test_split

inputs = np.array(inputs)
outputs = np.array(outputs)

X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [127]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_val_encoded = encoder.transform(y_val)
y_test_encoded = encoder.transform(y_test)
y_train_encoded = to_categorical(y_train_encoded)
y_val_encoded = to_categorical(y_val_encoded)
y_test_encoded = to_categorical(y_test_encoded)

model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(y_train_encoded.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, y_train_encoded, epochs=100, validation_data=(X_val, y_val_encoded))

loss, accuracy = model.evaluate(X_test, y_test_encoded)

print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')


2023-05-27 15:35:00.885172: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-27 15:35:00.890094: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-27 15:35:00.894808: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 1/100


2023-05-27 15:35:01.601120: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-27 15:35:01.605336: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-27 15:35:01.608337: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-05-27 15:35:05.436244: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-27 15:35:05.439835: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-27 15:35:05.443050: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7