In [69]:
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from collections import defaultdict
import math
import csv

In [5]:
def featureExtractor(filename, features):
    salaries = pd.read_csv(filename)
    teams = salaries.team.unique()
    years = salaries.year.unique()
    allFeatures = []
    for year in years:
        for team in teams:
            result = []
            sliced = salaries[((salaries['team'] == team) & (salaries['year'] == year))]
            teamFeatures = defaultdict(lambda: [])
            for index, row in sliced.iterrows():                
                pos = row['Pos.']
                cap = row['Cap %']
                teamFeatures[pos].append(cap)
            for (position, count) in features:
                posList = sorted(teamFeatures[position], reverse=True)
                for count in range(count):
                    if len(posList) == 0:
                        result.append(0)
                    else:
                        result.append(posList[0])
                        posList = posList[1:]
            allFeatures.append((team, year, result))
    return allFeatures

In [6]:
def distance(vec1, vec2):
    return np.linalg.norm(np.array(vec1)-np.array(vec2))

In [79]:
def kNNTrain(teamFeatures, draft_order, k):
    neigh = KNeighborsClassifier(n_neighbors=k, weights='distance')
    features = []
    output = []
    for (team, year, vec) in teamFeatures:
        if((year, team) in draft_order):
            features.append(vec)
            output.append(draft_order[(year, team)])
    features = np.array(features)
    output = np.array(output)
    output.reshape(1,-1)
    neigh.fit(features, output)
    return neigh 

In [80]:
def kNNPredict(neigh, featureVec):
    return neigh.predict_proba(featureVec)

In [28]:
def get_draft_position(filename):
    '''Returns a dict mapping team to position selected in the first round'''
    teams = {'PIT':'pittsburgh-steelers', 'CIN':'cincinnati-bengals', 'BAL':'baltimore-ravens', 'CLE':'cleveland-browns',
         'NWE':'new-england-patriots', 'BUF':'buffalo-bills', 'MIA':'miami-dolphins', 'NYJ':'new-york-jets',
         'TEN':'tennessee-titans', 'HOU':'houston-texans', 'IND':'indianapolis-colts', 'JAX':'jacksonville-jaguars',
         'KAN':'kansas-city-chiefs', 'OAK':'oakland-raiders', 'DEN':'denver-broncos', 'SDG':'san-diego-chargers',
         'GNB':'green-bay-packers', 'MIN':'minnesota-vikings', 'DET':'detroit-lions', 'CHI':'chicago-bears',
         'DAL':'dallas-cowboys', 'NYG':'new-york-giants', 'PHI':'philadelphia-eagles', 'WAS':'washington-redskins',
         'CAR':'carolina-panthers', 'ATL':'atlanta-falcons', 'NOR':'new-orleans-saints', 'TAM':'tampa-bay-buccaneers',
         'SEA':'seattle-seahawks', 'ARI':'arizona-cardinals', 'STL':'st.-louis-rams', 'SFO':'san-francisco-49ers'}
    draft = pd.read_csv(filename)
#     print draft.head(3)
    draft_order = {}
    for _, row in draft.iterrows():
        if row['Tm'] in teams:
            draft_order[(row['Year'], teams[row['Tm']])] = row['Position Standard']
        else:
            print 'not found', row
    return draft_order
    

In [9]:
def predict_draft_position(feature_vector, draft_picks, teamFeatures):
    kTeams = kNN(feature_vector, teamFeatures, 10)
    similar_drafts = defaultdict(int)
    for team_data in kTeams:
        team, year, distance = team_data
        if (year, team) in draft_picks:
            similar_drafts[draft_picks[(year, team)]] += 1
    return similar_drafts
        

In [70]:
features = [('QB', 1), ('RB', 1), ('DB', 4), ('LB', 3), ('C', 1), ('DE', 2), ('DT', 2), ('G', 2), ('TE', 2), ('WR', 3), ('T', 2)]
teamFeatures = featureExtractor('data/salaries.train.csv', features)
# teamFeatures[0][2] selects the feature vector from the head elem
testFeatures = featureExtractor('data/salaries.test.csv', features)
draftPositions = get_draft_position('data/nfldraft.train.csv')
draftTest = get_draft_position('data/nfldraft.test.csv')
maxCorrect = 0
bestK = 0
for k in range(1, 87):
    picks = 0
    correct = 0
    for i in range(32):
        probabilities = kNN(
            testFeatures[i][2], teamFeatures, draftPositions, k)
#         print testFeatures[i][0], testFeatures[i][1]
        maxProb = 0
        result = None
        for idx, probability in enumerate(probabilities[0]):
            if probability > maxProb:
                maxProb = probability
                result = features[idx]
    #         print features[idx], probability

        if not (testFeatures[i][1], testFeatures[i][0]) in draftTest:
            continue

        if result[0] == draftTest[(testFeatures[i][1], testFeatures[i][0])]:
    #         print '\tCorrect pick', result[0]
            correct += 1
#         else:
    #         print '\tIncorrect pick, guessed', result[0], 'was', draftTest[testFeatures[i][1], testFeatures[i][0]]
        picks += 1
    if correct > maxCorrect:
        bestK = k
        maxCorrect = correct
    print k, correct, '/', picks
print bestK, maxCorrect
    



1 6 / 30
2 6 / 30
3 7 / 30
4 4 / 30
5 5 / 30




6 4 / 30
7 3 / 30
8 3 / 30
9 3 / 30




10 3 / 30
11 2 / 30
12 4 / 30
13 4 / 30




14 5 / 30
15 5 / 30
16 5 / 30
17 4 / 30




18 3 / 30
19 2 / 30
20 2 / 30
21 2 / 30
22 3 / 30




23 3 / 30
24 3 / 30
25 3 / 30
26 4 / 30
27 4 / 30




28 4 / 30
29 3 / 30
30 5 / 30
31 5 / 30




32 4 / 30
33 3 / 30
34 3 / 30
35 5 / 30




36 5 / 30
37 6 / 30
38 6 / 30
39 6 / 30




40 6 / 30
41 6 / 30
42 5 / 30
43 5 / 30
44 5 / 30
45 5 / 30






46 5 / 30
47 5 / 30
48 5 / 30
49 5 / 30
50 5 / 30
51 5 / 30
52 5 / 30




53 5 / 30
54 5 / 30
55 5 / 30
56 5 / 30
57 5 / 30
58 5 / 30
59 5 / 30




60 5 / 30
61 5 / 30
62 5 / 30
63 5 / 30
64 5 / 30
65 5 / 30




66 5 / 30
67 5 / 30
68 5 / 30
69 5 / 30
70 5 / 30
71 5 / 30




72 5 / 30
73 5 / 30
74 5 / 30
75 5 / 30
76 5 / 30




77 5 / 30
78 5 / 30
79 5 / 30
80 5 / 30




81 5 / 30
82 5 / 30
83 5 / 30
84 5 / 30
85 5 / 30
86 5 / 30
3 7




In [81]:
features = [('QB', 1), ('RB', 1), ('DB', 4), ('LB', 3), ('C', 1), ('DE', 2), ('DT', 2), ('G', 2), ('TE', 2), ('WR', 3), ('T', 2)]
teamFeatures = featureExtractor('data/salaries.train.csv', features)
devFeatures  = featureExtractor('data/salaries.dev.csv', features)
testFeatures = featureExtractor('data/salaries.test.csv', features)

draftTrain = get_draft_position('data/nfldraft.train.csv')
draftDev = get_draft_position('data/nfldraft.dev.csv')
draftTest = get_draft_position('data/nfldraft.test.csv')
picks = 0
correct = 0
for k in range(1, 30):
    data = []
    data.append(['Team'] + [feature[0] for feature in features])
    for i in range(32):
        neigh = kNNTrain(teamFeatures, draftPositions, k)
        probabilities = kNNPredict(
            neigh, testFeatures[i][2])
        maxProb = 0
        result = None
        probabilityDict = defaultdict(float)
        for idx, probability in enumerate(probabilities[0]):
            if probability > maxProb:
                maxProb = probability
                result = features[idx][0]
            probabilityDict[features[idx][0]] = probability

        if not (testFeatures[i][1], testFeatures[i][0]) in draftTest:
            continue

        data.append([testFeatures[i][0]] + list(probabilities[0]) + [draftTest[(testFeatures[i][1], testFeatures[i][0])]])
        if draftTest[(testFeatures[i][1], testFeatures[i][0])] == result:
            correct += 1
        picks += 1
    print data
    with open("output/need.test.csv", "ab+") as f:
        writer = csv.writer(f)
        writer.writerows(data)
        writer.writerows([[k, correct, picks]])



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'LB'], ['cincinnati-bengals', 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'T'], ['baltimore-ravens', 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'WR'], ['cleveland-browns', 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'C'], ['new-england-patriots', 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'DT'], ['miami-dolphins', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 'WR'], ['new-york-jets', 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'DE'], ['tennessee-titans', 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'QB'], ['houston-texans', 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'DB'], ['indianapolis-colts', 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 'WR'], ['jacksonville-jaguars', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 'LB'], ['kansa



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.17284685087163029, 0.0, 0.0, 0.0, 0.0, 0.1718153162454388, 0.0, 0.0, 0.65533783288293102, 0.0, 0.0, 'LB'], ['cincinnati-bengals', 0.0, 0.3285084524509379, 0.0, 0.17286369784654673, 0.0, 0.16873427244407577, 0.0, 0.0, 0.32989357725843954, 0.0, 0.0, 'T'], ['baltimore-ravens', 0.0, 0.16353145705264213, 0.1619094981184864, 0.17596488802970567, 0.17282406904977854, 0.0, 0.15673336642981692, 0.0, 0.0, 0.0, 0.1690367213195704, 'WR'], ['cleveland-browns', 0.0, 0.0, 0.36739260037290195, 0.0, 0.14726949090872701, 0.16376430796680153, 0.1506708381631135, 0.0, 0.0, 0.0, 0.17090276258845608, 'C'], ['new-england-patriots', 0.20766888641407868, 0.16649084940601425, 0.0, 0.31639129080383971, 0.0, 0.0, 0.0, 0.0, 0.3094489733760673, 0.0, 0.0, 'DT'], ['miami-dolphins', 0.0, 0.0, 0.31556841750407594, 0.0, 0.16065026037393088, 0.0, 0.35135800982805798, 0.0, 0.17242331229393529, 0.0, 0.0, 'WR'], ['new-york-je



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.10830318210716536, 0.088111791902525202, 0.19272835921701534, 0.092575207268447313, 0.0, 0.10765683835310279, 0.0, 0.0, 0.41062462115174397, 0.0, 0.0, 'LB'], ['cincinnati-bengals', 0.0, 0.30065646159152087, 0.19122501845674633, 0.10639010581265879, 0.0, 0.10384862364498952, 0.0, 0.094844607117420127, 0.20303518337666429, 0.0, 0.0, 'T'], ['baltimore-ravens', 0.0, 0.19517897754096347, 0.19615000912085373, 0.11015983352760499, 0.20277874883717362, 0.0, 0.098120265624895661, 0.0, 0.091789593077883244, 0.0, 0.10582257227062539, 'WR'], ['cleveland-browns', 0.0, 0.0, 0.32723273436766664, 0.086802869419484688, 0.0952830959317994, 0.10595521292238554, 0.1836700030352591, 0.0, 0.0, 0.0, 0.20105608432340472, 'C'], ['new-england-patriots', 0.13200757786456885, 0.10583219347976747, 0.090692598710610278, 0.20111846640897588, 0.0, 0.089767674850960655, 0.0, 0.0, 0.2869712656831811, 0.0, 0.0936102230019



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.080484222740441647, 0.065479231058282072, 0.14322378982617043, 0.068796165145579014, 0.12903714698723368, 0.14255897723938146, 0.0, 0.0, 0.30515080746921019, 0.0, 0.065269659533701493, 'LB'], ['cincinnati-bengals', 0.0, 0.22142940857651977, 0.14083463404634697, 0.14371541261164522, 0.067076939153933576, 0.076483103650828457, 0.0, 0.069851767527337252, 0.2158907964115665, 0.0, 0.064717938021822213, 'T'], ['baltimore-ravens', 0.0, 0.20836403806504997, 0.21063570285810193, 0.081035738578558628, 0.14916803297400247, 0.0, 0.13903437253778692, 0.0, 0.13391695103130838, 0.0, 0.077845163955191629, 'WR'], ['cleveland-browns', 0.0, 0.0, 0.30741540028776465, 0.064895251216644495, 0.071235207874427289, 0.079213857863180373, 0.263750066590209, 0.0, 0.063177384483048799, 0.0, 0.15031283168472551, 'C'], ['new-england-patriots', 0.098962166104638635, 0.20341988624434898, 0.067989551533690393, 0.15077254



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.064680937642710795, 0.05262221484864095, 0.26170508186428482, 0.10503679100513273, 0.10370036975806879, 0.11456715370124056, 0.0, 0.0, 0.24523365794549917, 0.0, 0.052453793234422237, 'LB'], ['cincinnati-bengals', 0.0, 0.17629371216808304, 0.16330741501764123, 0.1642827841592234, 0.053404119535608634, 0.060892951606657517, 0.0, 0.055613332836233105, 0.22323589039206829, 0.0, 0.10296979428448473, 'T'], ['baltimore-ravens', 0.0, 0.16592709444472753, 0.26979278229400483, 0.06453140749904733, 0.11878738061164097, 0.051230865564318689, 0.11071761556060181, 0.0, 0.10664244553844157, 0.0, 0.11237040848721723, 'WR'], ['cleveland-browns', 0.0, 0.097874585090117511, 0.24757326320595774, 0.052262603289346427, 0.057368410470386655, 0.11169239488496077, 0.21240791643946014, 0.0, 0.050879140155778185, 0.048889070989684293, 0.12105261547430841, 'C'], ['new-england-patriots', 0.079996209523314807, 0.1644



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.054442340934842161, 0.084253963593394834, 0.25887488699111383, 0.12841761969212692, 0.087285235669032027, 0.096431874197487208, 0.0, 0.0, 0.20641467024360505, 0.0, 0.083879408678397965, 'LB'], ['cincinnati-bengals', 0.0, 0.14752242891541925, 0.17762614408549293, 0.13747169453808733, 0.12573269178567123, 0.050955170291487752, 0.0, 0.046537189779409925, 0.22798964402453722, 0.0, 0.086165036579894355, 'T'], ['baltimore-ravens', 0.0, 0.26285125826024752, 0.2661665762644127, 0.053861798054156425, 0.099147099898307292, 0.042760364946413207, 0.092411588116234705, 0.0, 0.089010205854842261, 0.0, 0.093791108605385851, 'WR'], ['cleveland-browns', 0.039440586467729369, 0.082256408831989786, 0.24818150992984139, 0.083971981556027336, 0.08818327138912363, 0.093869264310261061, 0.17851327183362448, 0.0, 0.04276018488170711, 0.041087677736165354, 0.10173584306353059, 'C'], ['new-england-patriots', 0.06



[['Team', 'QB', 'RB', 'DB', 'LB', 'C', 'DE', 'DT', 'G', 'TE', 'WR', 'T'], ['pittsburgh-steelers', 0.047278254621034743, 0.10597538414298459, 0.22480945183465884, 0.21030101550057226, 0.075799341574170023, 0.083742370801962077, 0.0, 0.0, 0.17925249299940627, 0.0, 0.072841688525211251, 'LB'], ['cincinnati-bengals', 0.0, 0.16217870612539112, 0.15337199782665178, 0.11870047928177518, 0.14254936282550698, 0.043997443661453695, 0.067761185813789357, 0.040182720885225107, 0.1968585613782193, 0.0, 0.074399542201987412, 'T'], ['baltimore-ravens', 0.0, 0.22621193043681173, 0.26371021439042464, 0.046353901424222771, 0.085326800463711022, 0.071870813358554136, 0.11394806532245667, 0.03525797678925563, 0.076602907013920987, 0.0, 0.080717390800642455, 'WR'], ['cleveland-browns', 0.034120857942569256, 0.071161701485312756, 0.24821721966092633, 0.07264587853361619, 0.076289151492238041, 0.081208220251099583, 0.18839371395453502, 0.0, 0.10440370798862662, 0.035545790283135086, 0.088013758407941192, 'C'



In [84]:
def get_draft_probability_matrix():
    features = [('QB', 1), ('RB', 1), ('DB', 4), ('LB', 3), ('C', 1), ('DE', 2), ('DT', 2), ('G', 2), ('TE', 2), ('WR', 3), ('T', 2)]
    teamFeatures = featureExtractor('data/salaries.train.csv', features)
    devFeatures  = featureExtractor('data/salaries.dev.csv', features)
    testFeatures = featureExtractor('data/salaries.test.csv', features)

    draftTrain = get_draft_position('data/nfldraft.train.csv')
    draftDev = get_draft_position('data/nfldraft.dev.csv')
    draftTest = get_draft_position('data/nfldraft.test.csv')

    data = {}
    for i in range(32):
        neigh = kNNTrain(teamFeatures, draftPositions, k)
        probabilities = kNNPredict(
            neigh, testFeatures[i][2])
        probabilityDict = {features[idx][0]: probability for idx, probability in enumerate(probabilities[0])}
        data[testFeatures[i][0]] = list(probabilities[0])
    return data

In [85]:
print get_draft_probability_matrix()



NameError: free variable 'probabilityDict' referenced before assignment in enclosing scope