In [1]:
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np
from collections import defaultdict
import math

In [66]:
df = pd.read_csv('data/salaries.test.csv')

In [69]:
df['Pos.'].unique()

array(['QB', 'LB', 'WR', 'DB', 'DE', 'C', 'TE', 'T', 'DT', 'G', 'FB', 'RB',
       'LS', 'P', 'K'], dtype=object)

In [70]:
salaries.year.unique()

array([2011, 2012, 2013])

In [51]:
def featureExtractor(filename, features):
    salaries = pd.read_csv(filename)
    teams = salaries.team.unique()
    years = salaries.year.unique()
    allFeatures = []
    for year in years:
        for team in teams:
            result = []
            sliced = salaries[((salaries['team'] == team) & (salaries['year'] == year))]
            teamFeatures = defaultdict(lambda: [])
            for index, row in sliced.iterrows():                
                pos = row['Pos.']
                cap = row['Cap %']
                teamFeatures[pos].append(cap)
            for (position, count) in features:
                posList = sorted(teamFeatures[position], reverse=True)
                for count in range(count):
                    if len(posList) == 0:
                        result.append(0)
                    else:
                        result.append(posList[0])
                        posList = posList[1:]
            allFeatures.append((team, year, result))
    return allFeatures

In [52]:
features = [('QB', 1), ('RB', 1), ('DB', 4), ('LB', 3), ('C', 1), ('DE', 2), ('DT', 2), ('G', 2), ('TE', 2), ('WR', 3), ('T', 2)]
teamFeatures = featureExtractor('data/salaries.train.csv', features)
# for (team, year, vec) in teamFeatures:
#     print team, year, vec

In [53]:
def distance(vec1, vec2):
    return np.linalg.norm(np.array(vec1)-np.array(vec2))

In [54]:
def kNN(featureVec, teamFeatures, k):
    kTop = [float('inf')] * k
    kTeams = [('', 0)] * k
    for (team, year, vec) in teamFeatures:
        dist = distance(vec, featureVec)
        if dist < max(kTop):
            minIndex = kTop.index(max(kTop))
            kTop[minIndex] = dist
            kTeams[minIndex] = (team, year, dist)
    return kTeams

In [55]:
print kNN([1.95, 2.39, 10.16, 3.22, 0.75, 0.55, 14.43, 5.68, 3.53, 0.56, 1.9, 0.8, 2.54, 1.77, 4.54, 0.8, 0.49, 0.44, 2.0, 1.13, 0.54, 4.51, 0.37], teamFeatures, 10)

[('pittsburgh-steelers', 2012, 11.365214472239403), ('kansas-city-chiefs', 2013, 10.250731681202078), ('baltimore-ravens', 2011, 10.996344847266295), ('indianapolis-colts', 2012, 8.5193309596470073), ('denver-broncos', 2011, 0.0), ('baltimore-ravens', 2012, 11.894465099364494), ('baltimore-ravens', 2013, 11.906850969084983), ('indianapolis-colts', 2013, 11.079115488160596), ('new-york-jets', 2012, 10.730745547258122), ('san-francisco-49ers', 2012, 11.587186888973527)]


In [56]:
teams = {'PIT':'pittsburgh-steelers', 'CIN':'cincinnati-bengals', 'BAL':'baltimore-ravens', 'CLE':'cleveland-browns',
         'NWE':'new-england-patriots', 'BUF':'buffalo-bills', 'MIA':'miami-dolphins', 'NYJ':'new-york-jets',
         'TEN':'tennessee-titans', 'HOU':'houston-texans', 'IND':'indianapolis-colts', 'JAX':'jacksonville-jaguars',
         'KAN':'kansas-city-chiefs', 'OAK':'oakland-raiders', 'DEN':'denver-broncos', 'SDG':'san-diego-chargers',
         'GNB':'green-bay-packers', 'MIN':'minnesota-vikings', 'DET':'detroit-lions', 'CHI':'chicago-bears',
         'DAL':'dallas-cowboys', 'NYG':'new-york-giants', 'PHI':'philadelphia-eagles', 'WAS':'washington-redskins',
         'CAR':'carolina-panthers', 'ATL':'atlanta-falcons', 'NWO':'new-orleans-saints', 'TAM':'tampa-bay-buccaneers',
         'SEA':'seattle-seahawks', 'ARI':'arizona-cardinals', 'STL':'st.-louis-rams', 'SFO':'san-francisco-49ers'}

In [80]:
def get_draft_position(filename):
    '''Returns a dict mapping team to position selected in the first round'''
    teams = {'PIT':'pittsburgh-steelers', 'CIN':'cincinnati-bengals', 'BAL':'baltimore-ravens', 'CLE':'cleveland-browns',
         'NWE':'new-england-patriots', 'BUF':'buffalo-bills', 'MIA':'miami-dolphins', 'NYJ':'new-york-jets',
         'TEN':'tennessee-titans', 'HOU':'houston-texans', 'IND':'indianapolis-colts', 'JAX':'jacksonville-jaguars',
         'KAN':'kansas-city-chiefs', 'OAK':'oakland-raiders', 'DEN':'denver-broncos', 'SDG':'san-diego-chargers',
         'GNB':'green-bay-packers', 'MIN':'minnesota-vikings', 'DET':'detroit-lions', 'CHI':'chicago-bears',
         'DAL':'dallas-cowboys', 'NYG':'new-york-giants', 'PHI':'philadelphia-eagles', 'WAS':'washington-redskins',
         'CAR':'carolina-panthers', 'ATL':'atlanta-falcons', 'NOR':'new-orleans-saints', 'TAM':'tampa-bay-buccaneers',
         'SEA':'seattle-seahawks', 'ARI':'arizona-cardinals', 'STL':'st.-louis-rams', 'SFO':'san-francisco-49ers'}
    draft = pd.read_csv(filename)
#     print draft.head(3)
    draft_order = {}
    for _, row in draft.iterrows():
        if row['Tm'] in teams:
            draft_order[(row['Year'], teams[row['Tm']])] = row['Position Standard']
        else:
            print 'not found', row
    return draft_order
    

In [85]:
def predict_draft_position(feature_vector, draft_picks, teamFeatures):
    kTeams = kNN(feature_vector, teamFeatures, 10)
    similar_drafts = defaultdict(int)
    for team_data in kTeams:
        team, year, distance = team_data
        if (year, team) in draft_picks:
            similar_drafts[draft_picks[(year, team)]] += 1
    return similar_drafts
        

In [86]:
features = [('QB', 1), ('RB', 1), ('DB', 4), ('LB', 3), ('C', 1), ('DE', 2), ('DT', 2), ('G', 2), ('TE', 2), ('WR', 3), ('T', 2)]
teamFeatures = featureExtractor('data/salaries.train.csv', features)
# teamFeatures[0][2] selects the feature vector from the head elem
for i in range(20):
    print predict_draft_position(teamFeatures[i][2], get_draft_position('data/nfldraft.train.csv'), teamFeatures)

defaultdict(<type 'int'>, {'LB': 1, 'G': 1, 'DE': 2, 'DB': 1, 'T': 3, 'DT': 1})
defaultdict(<type 'int'>, {'G': 1, 'DE': 2, 'QB': 4, 'WR': 1, 'RB': 1, 'TE': 1})
defaultdict(<type 'int'>, {'LB': 1, 'DE': 4, 'DB': 1, 'T': 2, 'G': 1})
defaultdict(<type 'int'>, {'DT': 1, 'DE': 3, 'WR': 1, 'G': 1, 'QB': 4})
defaultdict(<type 'int'>, {'DB': 1, 'DE': 2, 'LB': 1, 'T': 3, 'RB': 1})
defaultdict(<type 'int'>, {'DT': 1, 'DE': 5, 'DB': 1, 'G': 1, 'QB': 1})
defaultdict(<type 'int'>, {'LB': 2, 'G': 1, 'DE': 2, 'QB': 2, 'T': 1, 'DT': 1})
defaultdict(<type 'int'>, {'C': 1, 'G': 1, 'DE': 1, 'DB': 1, 'T': 2, 'DT': 3})
defaultdict(<type 'int'>, {'G': 2, 'DE': 2, 'DB': 1, 'QB': 2, 'WR': 2, 'T': 1})
defaultdict(<type 'int'>, {'G': 1, 'DE': 4, 'QB': 1, 'WR': 2, 'TE': 1, 'T': 1})
defaultdict(<type 'int'>, {'C': 1, 'DE': 1, 'WR': 2, 'RB': 1, 'DT': 1, 'T': 3})
defaultdict(<type 'int'>, {'DE': 2, 'DB': 2, 'QB': 2, 'T': 1, 'DT': 2, 'WR': 1})
defaultdict(<type 'int'>, {'G': 1, 'DE': 3, 'DB': 1, 'WR': 2, 'RB': 1, '

IOError: File data/nfl_data.train.csv does not exist