In [1]:
# -*- coding:utf-8 -*-
import pandas as pd
import math
import csv
import random
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import cross_val_score

# 当每支队伍没有elo等级分时，赋予其基础elo等级分
base_elo = 1600
team_elos = {} 
team_stats = {}
X = []
y = []
 #存放数据的目录

# 根据每支队伍的Miscellaneous, Opponent, Team统计数据csv文件进行初始化
def initialize_data(Mstat, Ostat, Tstat):
    new_Mstat = Mstat.drop(['Rk', 'Arena'], axis=1)
    new_Ostat = Ostat.drop(['Rk', 'G', 'MP'], axis=1)
    new_Tstat = Tstat.drop(['Rk', 'G', 'MP'], axis=1)

    team_stats1 = pd.merge(new_Mstat, new_Ostat, how='left', on='Team')
    team_stats1 = pd.merge(team_stats1, new_Tstat, how='left', on='Team')
    return team_stats1.set_index('Team', inplace=False, drop=True)

def get_elo(team):
    try:
        return team_elos[team]
    except:
        # 当最初没有elo时，给每个队伍最初赋base_elo
        team_elos[team] = base_elo
        return team_elos[team]
    
# 计算每个球队的elo值
def calc_elo(win_team, lose_team):
    winner_rank = get_elo(win_team)
    loser_rank = get_elo(lose_team)

    rank_diff = winner_rank - loser_rank
    exp = (rank_diff  * -1) / 400
    odds = 1 / (1 + math.pow(10, exp))
    # 根据rank级别修改K值
    if winner_rank < 2100:
        k = 32
    elif winner_rank >= 2100 and winner_rank < 2400:
        k = 24
    else:
        k = 16
    new_winner_rank = round(winner_rank + (k * (1 - odds)))
    new_rank_diff = new_winner_rank - winner_rank
    new_loser_rank = loser_rank - new_rank_diff

    return new_winner_rank, new_loser_rank


In [14]:
def  build_dataSet(all_data):
    print("Building data set..")
    X = []
    skip = 0
    for index, row in all_data.iterrows():

        Wteam = row['WTeam']
        Lteam = row['LTeam']

        #获取最初的elo或是每个队伍最初的elo值
        team1_elo = get_elo(Wteam)
        team2_elo = get_elo(Lteam)

        # 给主场比赛的队伍加上100的elo值
        if row['WLoc'] == 'H':
            team1_elo += 100
        else:
            team2_elo += 100

        # 把elo当为评价每个队伍的第一个特征值
        team1_features = [team1_elo]
        team2_features = [team2_elo]

        # 添加我们从basketball reference.com获得的每个队伍的统计信息
        for key, value in team_stats.loc[Wteam].iteritems():
            team1_features.append(value)
        for key, value in team_stats.loc[Lteam].iteritems():
            team2_features.append(value)

        # 将两支队伍的特征值随机的分配在每场比赛数据的左右两侧
        # 并将对应的0/1赋给y值
        if random.random() > 0.5:
            X.append(team1_features + team2_features)
            y.append(0)
        else:
            X.append(team2_features + team1_features)
            y.append(1)

        if skip == 0:
            print('X',X)
            skip = 1

        # 根据这场比赛的数据更新队伍的elo值
        new_winner_rank, new_loser_rank = calc_elo(Wteam, Lteam)
        team_elos[Wteam] = new_winner_rank
        team_elos[Lteam] = new_loser_rank

    return np.nan_to_num(X), y

In [15]:
if __name__ == '__main__':

    Mstat = pd.read_csv('15-16Miscellaneous_Stat.csv')
    Ostat = pd.read_csv('15-16Opponent_Per_Game_Stat.csv')
    Tstat = pd.read_csv('15-16Team_Per_Game_Stat.csv')

    team_stats = initialize_data(Mstat, Ostat, Tstat)

    result_data = pd.read_csv('2015-2016_result.csv')
    X, y = build_dataSet(result_data)

    # 训练网络模型
    print("Fitting on %d game samples.." % len(X))

    model = linear_model.LogisticRegression()
    model.fit(X, y)

    #利用10折交叉验证计算训练正确率
    print("Doing cross-validation..")
    print(cross_val_score(model, X, y, cv = 10, scoring='accuracy', n_jobs=-1).mean())

Building data set..
X [[1700, 27.2, 32.0, 50.0, 33.0, 49.0, -2.73, 0.0, -2.74, 104.6, 107.6, 93.4, 0.255, 0.256, 0.527, 0.483, 12.6, 23.7, 0.205, 0.48700000000000004, 10.5, 75.8, 0.204, 812292.0, 38.0, 85.8, 0.44299999999999995, 7.6, 22.4, 0.341, 30.3, 63.4, 0.479, 17.5, 23.2, 0.754, 10.9, 33.3, 44.2, 20.8, 7.2, 4.2, 11.3, 18.5, 101.1, 36.9, 84.0, 0.439, 7.4, 21.5, 0.34600000000000003, 29.4, 62.5, 0.47100000000000003, 17.2, 21.4, 0.805, 10.4, 34.0, 44.4, 20.5, 5.7, 5.7, 13.4, 19.7, 98.4, 1600, 28.2, 48.0, 34.0, 51.0, 31.0, 3.61, -0.12, 3.49, 105.1, 101.4, 97.1, 0.237, 0.336, 0.552, 0.516, 13.8, 19.1, 0.185, 0.48, 14.4, 74.6, 0.19399999999999998, 690150.0, 37.1, 86.1, 0.43200000000000005, 8.3, 24.5, 0.33799999999999997, 28.9, 61.6, 0.469, 16.7, 22.1, 0.755, 11.5, 35.0, 46.5, 22.0, 8.6, 5.0, 16.1, 18.3, 99.2, 38.6, 84.4, 0.45799999999999996, 9.9, 28.4, 0.35, 28.7, 56.1, 0.512, 15.6, 20.0, 0.7829999999999999, 8.3, 33.8, 42.1, 25.6, 9.1, 5.9, 15.0, 19.1, 102.8]]
Fitting on 1316 game sample

In [17]:
print(X)

[[1700.    27.2   32.  ...   15.    19.1  102.8]
 [1700.    30.3   67.  ...   14.8   18.    98.6]
 [1584.    26.9   21.  ...   13.3   21.7   99.1]
 ...
 [1803.    28.1   57.  ...   15.2   20.7  114.9]
 [1902.    27.4   73.  ...   13.6   20.3  104.3]
 [1982.    27.4   73.  ...   13.6   20.3  104.3]]


In [5]:
def predict_winner(team_1, team_2, model):
    features = []

    # team 1，客场队伍
    features.append(get_elo(team_1))
    for key, value in team_stats.loc[team_1].iteritems():
        features.append(value)

    # team 2，主场队伍
    features.append(get_elo(team_2) + 100)
    for key, value in team_stats.loc[team_2].iteritems():
        features.append(value)

    features = np.nan_to_num(features)
    return model.predict_proba([features])

In [7]:
print('Predicting on new schedule..')
schedule1617 = pd.read_csv('16-17Schedule.csv')
result = []
for index, row in schedule1617.iterrows():
    team1 = row['Vteam']
    team2 = row['Hteam']
    pred = predict_winner(team1, team2, model)
    prob = pred[0][0]
    if prob > 0.5:
        winner = team1
        loser = team2
        result.append([winner, loser, prob])
    else:
        winner = team2
        loser = team1
        result.append([winner, loser, 1 - prob])

with open('16-17Result.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['win', 'lose', 'probability'])
    writer.writerows(result)
    print('done.')

Predicting on new schedule..
done.


In [None]:
result_data = pd.read_csv('2015-2016_result.csv')
    
print(result_data)

In [7]:
print(result_data.iterrows())

<generator object DataFrame.iterrows at 0x0000000009302B48>


In [8]:
print(next(result_data.iterrows()))

(0, WTeam      Atlanta Hawks
LTeam    New York Knicks
WLoc                   V
Name: 0, dtype: object)


In [13]:
team_stats = initialize_data(Mstat, Ostat, Tstat)
team1_features = []

for index, row in result_data.iterrows():
    Wteam = row['WTeam']
    print(Wteam)
for key, value in team_stats.loc[Wteam].iteritems():
    team1_features.append(value)
        

        


Atlanta Hawks
San Antonio Spurs
Memphis Grizzlies
Chicago Bulls
Detroit Pistons
Miami Heat
Atlanta Hawks
Chicago Bulls
Cleveland Cavaliers
Dallas Mavericks
Los Angeles Clippers
Denver Nuggets
Detroit Pistons
Golden State Warriors
Golden State Warriors
Toronto Raptors
Los Angeles Clippers
Sacramento Kings
Memphis Grizzlies
Cleveland Cavaliers
Minnesota Timberwolves
Minnesota Timberwolves
Golden State Warriors
Portland Trail Blazers
New York Knicks
New York Knicks
Oklahoma City Thunder
Boston Celtics
Phoenix Suns
Phoenix Suns
Los Angeles Clippers
Oklahoma City Thunder
Toronto Raptors
Detroit Pistons
Utah Jazz
Utah Jazz
Washington Wizards
Washington Wizards
San Antonio Spurs
Chicago Bulls
Atlanta Hawks
Dallas Mavericks
Miami Heat
Oklahoma City Thunder
Toronto Raptors
Milwaukee Bucks
Golden State Warriors
Houston Rockets
Los Angeles Clippers
Portland Trail Blazers
San Antonio Spurs
Cleveland Cavaliers
Charlotte Hornets
Toronto Raptors
Indiana Pacers
Denver Nuggets
Atlanta Hawks
Orlando Mag

Washington Wizards
Orlando Magic
Charlotte Hornets
San Antonio Spurs
Toronto Raptors
Sacramento Kings
Golden State Warriors
Miami Heat
San Antonio Spurs
Boston Celtics
Charlotte Hornets
Cleveland Cavaliers
Brooklyn Nets
Atlanta Hawks
Memphis Grizzlies
Milwaukee Bucks
Phoenix Suns
Miami Heat
Toronto Raptors
Chicago Bulls
Cleveland Cavaliers
Atlanta Hawks
Minnesota Timberwolves
Utah Jazz
New York Knicks
San Antonio Spurs
Indiana Pacers
Denver Nuggets
Detroit Pistons
Los Angeles Lakers
Phoenix Suns
Miami Heat
Oklahoma City Thunder
Houston Rockets
Chicago Bulls
Charlotte Hornets
Memphis Grizzlies
Los Angeles Clippers
Golden State Warriors
Indiana Pacers
New Orleans Pelicans
Denver Nuggets
Los Angeles Lakers
San Antonio Spurs
Portland Trail Blazers
Toronto Raptors
Atlanta Hawks
Boston Celtics
Charlotte Hornets
Detroit Pistons
Golden State Warriors
Milwaukee Bucks
Oklahoma City Thunder
Houston Rockets
New York Knicks
Cleveland Cavaliers
Denver Nuggets
Cleveland Cavaliers
San Antonio Spurs
To