In [53]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [55]:
df=pd.read_csv("./preprocessed.csv")

In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10438 entries, 0 to 10437
Data columns (total 31 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   game_id              10438 non-null  object 
 1   game_date            10438 non-null  object 
 2   team                 10438 non-null  object 
 3   AST                  10438 non-null  int64  
 4   BLK                  10438 non-null  int64  
 5   STL                  10438 non-null  int64  
 6   TOV                  10438 non-null  int64  
 7   TOV_team             10438 non-null  int64  
 8   F_tech               10438 non-null  int64  
 9   F_personal           10438 non-null  int64  
 10  team_score           10438 non-null  int64  
 11  opponent_team_score  10438 non-null  int64  
 12  OT_length_min_tot    10438 non-null  float64
 13  rest_days            9777 non-null   float64
 14  attendance           8976 non-null   float64
 15  tz_dif_H_E           9538 non-null  

In [59]:
df.drop('region',axis=1,inplace=True)

In [61]:
y = df["result"]
# 숫자형 컬럼만 선택 (팀명, 게임 ID 같은 범주형 데이터 제외)
X = df.select_dtypes(include=[np.number]).drop(columns=["win"], errors="ignore")
X = X.fillna(X.mean())

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [63]:
model = LogisticRegression()
model.fit(X_scaled, y)

In [65]:
df=pd.read_csv("./East Regional Games to Predict.csv")

In [67]:
df.drop('game_id',axis=1,inplace=True)
df.drop('description',axis=1,inplace=True)

In [69]:
pred_train=model.predict(df)
pred_train



ValueError: could not convert string to float: 'rhode_island_rams'

In [71]:
feature_weights = pd.DataFrame({"Feature": X.columns, "Weight": model.coef_[0]})
feature_weights = feature_weights.sort_values(by="Weight", ascending=False)

feature_weights

Unnamed: 0,Feature,Weight
16,result,5.886162
7,team_score,1.206709
25,REB,0.575619
20,three_point_per,0.381583
2,STL,0.317406
22,eFG%,0.279563
19,field_goal_per,0.279563
18,win_per,0.256612
24,pos,0.174229
21,free_throw_per,0.148605


In [73]:
# 소프트맥스 계산
# 1. 각 가중치의 지수값 계산
exp_weights = np.exp(feature_weights['Weight'])

# 2. 모든 지수값의 합 계산
sum_exp_weights = exp_weights.sum()

# 3. 각 가중치에 소프트맥스 적용: exp(가중치) / (전체 exp 합)
feature_weights['Softmax_Weight'] = exp_weights / sum_exp_weights

# 결과 출력
feature_weights
   

Unnamed: 0,Feature,Weight,Softmax_Weight
16,result,5.886162,0.925798
7,team_score,1.206709,0.008595
25,REB,0.575619,0.004573
20,three_point_per,0.381583,0.003766
2,STL,0.317406,0.003532
22,eFG%,0.279563,0.003401
19,field_goal_per,0.279563,0.003401
18,win_per,0.256612,0.003324
24,pos,0.174229,0.003061
21,free_throw_per,0.148605,0.002984


In [75]:
feature_weights = pd.concat([feature_weights.iloc[:0], feature_weights.iloc[1:]]).reset_index(drop=True)

In [77]:
feature_weights

Unnamed: 0,Feature,Weight,Softmax_Weight
0,team_score,1.206709,0.008595
1,REB,0.575619,0.004573
2,three_point_per,0.381583,0.003766
3,STL,0.317406,0.003532
4,eFG%,0.279563,0.003401
5,field_goal_per,0.279563,0.003401
6,win_per,0.256612,0.003324
7,pos,0.174229,0.003061
8,free_throw_per,0.148605,0.002984
9,AST,0.133945,0.00294


In [79]:
feature_weights = pd.concat([feature_weights.iloc[:0], feature_weights.iloc[1:]]).reset_index(drop=True)

In [81]:
feature_weights

Unnamed: 0,Feature,Weight,Softmax_Weight
0,REB,0.575619,0.004573
1,three_point_per,0.381583,0.003766
2,STL,0.317406,0.003532
3,eFG%,0.279563,0.003401
4,field_goal_per,0.279563,0.003401
5,win_per,0.256612,0.003324
6,pos,0.174229,0.003061
7,free_throw_per,0.148605,0.002984
8,AST,0.133945,0.00294
9,BLK,0.051412,0.002707


In [87]:
feature_weights = pd.concat([feature_weights.iloc[:15], feature_weights.iloc[24:]]).reset_index(drop=True)
feature_weights

Unnamed: 0,Feature,Weight,Softmax_Weight
0,REB,0.575619,0.004573
1,three_point_per,0.381583,0.003766
2,STL,0.317406,0.003532
3,eFG%,0.279563,0.003401
4,field_goal_per,0.279563,0.003401
5,win_per,0.256612,0.003324
6,pos,0.174229,0.003061
7,free_throw_per,0.148605,0.002984
8,AST,0.133945,0.00294
9,BLK,0.051412,0.002707


In [89]:
feature_weights.drop(feature_weights[feature_weights['Feature'].isin(['team_score', 'opponent_team_score'])].index, inplace=True)
feature_weights.drop(columns=['Softmax_Weight'], inplace=True)

In [91]:
exp_vals = np.exp(feature_weights['Weight'])
sum_exp = exp_vals.sum()
feature_weights['Softmax_Weight'] = exp_vals / sum_exp
feature_weights

Unnamed: 0,Feature,Weight,Softmax_Weight
0,REB,0.575619,0.103416
1,three_point_per,0.381583,0.085176
2,STL,0.317406,0.079881
3,eFG%,0.279563,0.076915
4,field_goal_per,0.279563,0.076915
5,win_per,0.256612,0.07517
6,pos,0.174229,0.069225
7,free_throw_per,0.148605,0.067474
8,AST,0.133945,0.066492
9,BLK,0.051412,0.061225


In [95]:
feature_weights.loc[feature_weights["Weight"] < 0, "Softmax_Weight"] *= -1
feature_weights

Unnamed: 0,Feature,Weight,Softmax_Weight
0,REB,0.575619,0.103416
1,three_point_per,0.381583,0.085176
2,STL,0.317406,0.079881
3,eFG%,0.279563,0.076915
4,field_goal_per,0.279563,0.076915
5,win_per,0.256612,0.07517
6,pos,0.174229,0.069225
7,free_throw_per,0.148605,0.067474
8,AST,0.133945,0.066492
9,BLK,0.051412,0.061225


In [97]:
softmax_weights = dict(zip(feature_weights["Feature"], feature_weights["Softmax_Weight"]))
softmax_weights

{'REB': 0.10341569628455542,
 'three_point_per': 0.08517609732846905,
 'STL': 0.07988147046458037,
 'eFG%': 0.07691500165390491,
 'field_goal_per': 0.07691500165390491,
 'win_per': 0.07516978081300454,
 'pos': 0.06922528874643018,
 'free_throw_per': 0.067473985191411,
 'AST': 0.06649206925616173,
 'BLK': 0.061224621874201614,
 'game_count': 0.06016816763286809,
 'home_away_NS': 0.059666040176256596,
 'TOV_team': 0.05956071883407313,
 'OT_length_min_tot': 0.058716060090178525}

In [99]:
df['elo_rating']=1500
ratings = {}             # 각 팀의 Elo 점수를 저장할 딕셔너리
default_rating = 1500    # 초기 Elo 점수
k_base = 32   

In [101]:
df

Unnamed: 0,team_home,team_away,seed_home,seed_away,home_away_NS,rest_days_Home,rest_days_Away,travel_dist_Home,travel_dist_Away,WINNING %,elo_rating
0,rhode_island_rams,north_carolina_tar_heels,16,17,0,6,1,0,770,,1500
1,nc_state_wolfpack,rhode_island_rams,1,16,0,7,1,1440,0,,1500
2,nc_state_wolfpack,north_carolina_tar_heels,1,17,0,7,1,1440,770,,1500
3,liberty_flames,bucknell_bison,2,9,0,7,10,255,250,,1500
4,drexel_dragons,delaware_blue_hens,3,10,0,11,9,0,680,,1500
5,massachusetts_minutewomen,princeton_tigers,4,11,0,9,7,75,34,,1500
6,buffalo_bulls,stony_brook_seawolves,5,12,0,8,8,60,3400,,1500
7,fairfield_stags,towson_tigers,6,13,0,7,9,20,220,,1500
8,uconn_huskies,campbell_fighting_camels,7,14,0,9,10,15,1966,,1500
9,american_university_eagles,columbia_lions,8,15,0,7,8,350,145,,1500


In [103]:
def calc_adjustment_factor(row, weights_dict):
    """
    row: 한 팀의 경기 데이터 (pandas Series)
    weights_dict: { 'REB': softmax_weight, 'three_point_per': softmax_weight, ... }
    각 지표의 실제 값과 softmax 가중치를 곱한 후 모두 합산하여 조정 인자를 반환
    """
    row = row.fillna(0)  # 결측값은 0으로 처리
    total = 0
    for feat, w in weights_dict.items():
        if feat in row.index:
            total += row[feat] * w
    return total

In [105]:
# (2) Elo 업데이트를 위한 함수 (가중치 적용 버전)
def update_weighted_elo_by_game_id(df, ratings=None, k=32, default_rating=1500, weight_dict=None):
    """
    df: 경기 데이터 (각 game_id별로 두 팀이 존재)
    ratings: {팀명: 현재 Elo 점수} 딕셔너리 (없으면 자동 생성)
    k: 기본 Elo 변동량
    default_rating: 새로운 팀의 초기 Elo 점수
    weight_dict: 각 경기 지표에 적용할 가중치 딕셔너리 (예: {"REB": 0.07, "three_point_per": 0.05, "TOV": -0.03})
    """
    if ratings is None:
        ratings = {}
        
    new_ratings = ratings.copy()
    df = df.copy()  # 원본 DataFrame 보호

    # 경기 전 Elo를 저장하는 컬럼 추가
    df["elo_before"] = df["team"].map(lambda team: new_ratings.get(team, default_rating))
    
    # 각 경기(game_id)별로 처리
    for game_id, game_data in df.groupby("game_id"):
        if len(game_data) != 2:
            continue  # 두 팀이 아닌 경우 건너뛰기

        # 팀 이름과 점수 추출
        team1, team2 = game_data["team"].values
        score1, score2 = game_data["team_score"].values

        # Elo 초기 할당: 해당 팀이 ratings에 없으면 기본값 할당
        if team1 not in new_ratings:
            new_ratings[team1] = default_rating
        if team2 not in new_ratings:
            new_ratings[team2] = default_rating

        rating1 = new_ratings[team1]
        rating2 = new_ratings[team2]

        # 승패 결정: 점수가 높은 팀이 승리
        if score1 > score2:
            result1, result2 = 1, 0
        elif score1 < score2:
            result1, result2 = 0, 1
        else:
            result1, result2 = 0.5, 0.5  # 무승부 처리

        # 예상 승률 계산 (기본 Elo 공식)
        expected1 = 1 / (1 + 10 ** ((rating2 - rating1) / 400))
        expected2 = 1 - expected1

        # 각 팀의 경기 데이터 행(row) 추출
        row1 = game_data.loc[game_data["team"] == team1].iloc[0]
        row2 = game_data.loc[game_data["team"] == team2].iloc[0]

        # 조정 인자 계산: weight_dict에 있는 각 지표의 값을 곱하여 합산
        if weight_dict is not None:
            adj1 = calc_adjustment_factor(row1, weight_dict)
            adj2 = calc_adjustment_factor(row2, weight_dict)
        else:
            adj1 = 0
            adj2 = 0

        # Elo 업데이트:
        # 승리한 팀의 Elo 상승폭에 (1 + 조정 인자)를 곱하여 보정
        if result1 == 1:
            new_rating1 = rating1 + k * (result1 - expected1) * (1 + adj1)
            new_rating2 = rating2 + k * (result2 - expected2)
        elif result2 == 1:
            new_rating1 = rating1 + k * (result1 - expected1)
            new_rating2 = rating2 + k * (result2 - expected2) * (1 + adj2)
        else:
            # 무승부의 경우 기본 Elo 업데이트
            new_rating1 = rating1 + k * (result1 - expected1)
            new_rating2 = rating2 + k * (result2 - expected2)
        
        new_ratings[team1] = new_rating1
        new_ratings[team2] = new_rating2
    
    # 경기 후 Elo를 "elo_after" 컬럼에 추가
    df["elo_after"] = df["team"].map(lambda team: new_ratings.get(team, default_rating))
    return df
    

In [107]:
weight_dict = dict(zip(feature_weights["Feature"], feature_weights["Softmax_Weight"]))

# (5) 가중치 적용하여 Elo 업데이트 실행


In [109]:
df_weighted = update_weighted_elo_by_game_id(df, k=32, default_rating=1500, weight_dict=weight_dict)

KeyError: 'team'

In [111]:
df_weighted

NameError: name 'df_weighted' is not defined

In [193]:
df_weighted = df_weighted.sort_values(by="elo_after", ascending=False)
df_weighted

Unnamed: 0,game_id,game_date,team,AST,BLK,STL,TOV,TOV_team,F_tech,F_personal,...,field_goal_per,three_point_per,free_throw_per,eFG%,FGA,pos,REB,elo_after,elo_rating,elo_before
8549,game_2022_1007,2021-11-27,unc_pembroke_braves,17,1,5,12,2,0,18,...,0.475000,0.333333,0.692308,0.475000,58,65.9712,42,1592.311064,1500,1500
2281,game_2022_1090,2021-11-29,freed_hardeman_lions,11,2,10,13,2,0,11,...,0.531915,0.363636,0.600000,0.531915,58,64.8960,32,1584.225540,1500,1500
8042,game_2022_1567,2021-12-11,texas_am_international_dustdevils,11,1,12,20,1,0,10,...,0.454545,0.352941,0.625000,0.454545,61,72.0384,39,1577.175913,1500,1500
8546,game_2022_840,2021-11-24,coker_cobras,8,4,8,16,0,0,18,...,0.297297,0.227273,0.608696,0.297297,59,66.3552,44,1506.873742,1500,1500
8760,game_2022_1406,2021-12-06,florida_national_conquistadors,6,2,11,28,0,0,19,...,0.228571,0.285714,0.583333,0.228571,49,75.1488,21,1490.344430,1500,1500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2173,game_2022_223,2021-11-12,south_carolina_gamecocks,18,10,4,20,2,0,20,...,0.500000,0.400000,0.727273,0.500000,60,68.0064,49,-295295.107779,1500,1500
126,game_2022_2115,2022-01-02,south_carolina_gamecocks,15,4,6,18,0,0,11,...,0.547619,0.357143,0.791667,0.547619,56,70.6176,45,-295295.107779,1500,1500
2163,game_2022_1589,2021-12-12,south_carolina_gamecocks,7,15,5,16,0,0,16,...,0.428571,0.176471,0.555556,0.428571,66,67.0848,61,-295295.107779,1500,1500
2165,game_2022_1233,2021-12-03,south_carolina_gamecocks,16,16,4,14,1,0,14,...,0.465116,0.307692,0.481481,0.465116,56,62.2848,52,-295295.107779,1500,1500


In [195]:
df_weighted = df_weighted.groupby('team').mean(numeric_only=True).reset_index()

In [197]:
df_weighted

Unnamed: 0,team,AST,BLK,STL,TOV,TOV_team,F_tech,F_personal,team_score,opponent_team_score,...,field_goal_per,three_point_per,free_throw_per,eFG%,FGA,pos,REB,elo_after,elo_rating,elo_before
0,abilene_christian_wildcats,16.100000,1.933333,7.666667,14.333333,0.600000,0.266667,15.966667,73.866667,66.600000,...,0.499220,0.353175,0.763628,0.499220,59.100000,66.897920,34.033333,-6630.624091,1500.0,1500.0
1,academy_of_art_urban_knights,10.500000,5.500000,9.000000,14.500000,1.500000,0.000000,16.500000,50.000000,65.500000,...,0.432927,0.160774,0.472222,0.432927,63.000000,67.315200,35.500000,1436.000722,1500.0,1500.0
2,agnes_scott_college_scotties,5.000000,2.000000,4.500000,38.000000,1.500000,0.000000,11.500000,25.000000,109.500000,...,0.209122,0.174825,0.472222,0.209122,50.500000,78.105600,30.500000,1441.612560,1500.0,1500.0
3,air_force_falcons,12.806452,2.064516,11.161290,13.161290,1.129032,0.096774,17.774194,62.064516,58.161290,...,0.415959,0.285179,0.754856,0.415959,61.193548,65.398916,34.225806,-16378.655565,1500.0,1500.0
4,akron_zips,14.392857,3.535714,7.142857,14.928571,0.500000,0.142857,15.571429,67.428571,66.714286,...,0.483366,0.317646,0.704481,0.483366,58.000000,67.371429,35.714286,866.655529,1500.0,1500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,xavier_la_gold_rush,8.000000,1.500000,8.000000,25.500000,0.500000,0.500000,22.500000,42.500000,69.500000,...,0.276316,0.244444,0.732323,0.276316,50.000000,67.507200,35.000000,1437.468505,1500.0,1500.0
560,xavier_musketeers,12.866667,2.966667,7.500000,16.366667,0.533333,0.066667,16.600000,62.333333,72.833333,...,0.419886,0.261187,0.734109,0.419886,59.233333,68.523520,36.300000,-9701.283172,1500.0,1500.0
561,yale_bulldogs,13.666667,3.703704,7.037037,18.555556,1.296296,0.000000,16.592593,58.518519,56.814815,...,0.424179,0.307496,0.657722,0.424179,56.481481,66.414933,38.851852,-3686.555534,1500.0,1500.0
562,young_harris_college_mountain_lions,11.000000,1.000000,3.000000,18.000000,0.000000,0.000000,17.000000,58.000000,91.000000,...,0.450000,0.357143,0.583333,0.450000,54.000000,70.348800,31.000000,1475.960682,1500.0,1500.0


In [199]:
df_weighted = df_weighted.sort_values(by="elo_after", ascending=False)
df_weighted

Unnamed: 0,team,AST,BLK,STL,TOV,TOV_team,F_tech,F_personal,team_score,opponent_team_score,...,field_goal_per,three_point_per,free_throw_per,eFG%,FGA,pos,REB,elo_after,elo_rating,elo_before
496,unc_pembroke_braves,17.000000,1.000000,5.000000,12.000000,2.000000,0.000000,18.000000,65.000000,45.000000,...,0.475000,0.333333,0.692308,0.475000,58.000000,65.971200,42.000000,1592.311064,1500.0,1500.0
157,freed_hardeman_lions,11.000000,2.000000,10.000000,13.000000,2.000000,0.000000,11.000000,71.000000,62.000000,...,0.531915,0.363636,0.600000,0.531915,58.000000,64.896000,32.000000,1584.225540,1500.0,1500.0
460,texas_am_international_dustdevils,11.000000,1.000000,12.000000,20.000000,1.000000,0.000000,10.000000,68.000000,55.000000,...,0.454545,0.352941,0.625000,0.454545,61.000000,72.038400,39.000000,1577.175913,1500.0,1500.0
96,coker_cobras,8.000000,4.000000,8.000000,16.000000,0.000000,0.000000,18.000000,51.000000,44.000000,...,0.297297,0.227273,0.608696,0.297297,59.000000,66.355200,44.000000,1506.873742,1500.0,1500.0
153,florida_national_conquistadors,6.000000,2.000000,11.000000,28.000000,0.000000,0.000000,19.000000,35.000000,91.000000,...,0.228571,0.285714,0.583333,0.228571,49.000000,75.148800,21.000000,1490.344430,1500.0,1500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489,uconn_huskies,18.833333,4.733333,9.300000,14.133333,0.566667,0.033333,13.566667,74.766667,54.166667,...,0.571101,0.337707,0.703496,0.571101,60.066667,66.202880,38.666667,-163921.862081,1500.0,1500.0
453,tennessee_lady_volunteers,14.645161,5.967742,6.290323,17.193548,0.516129,0.064516,15.516129,69.935484,60.193548,...,0.451456,0.298011,0.635182,0.451456,63.580645,70.253419,47.935484,-168166.079357,1500.0,1500.0
203,iowa_state_cyclones,14.281250,3.718750,5.375000,12.781250,0.468750,0.031250,12.750000,76.875000,63.750000,...,0.474416,0.387030,0.832651,0.474416,60.750000,67.394400,39.718750,-198473.349686,1500.0,1500.0
249,louisville_cardinals,15.862069,4.413793,9.586207,14.448276,0.275862,0.413793,15.931034,72.172414,55.034483,...,0.491077,0.366554,0.697514,0.491077,61.965517,66.320772,38.965517,-200767.354868,1500.0,1500.0
