In [1]:
import pandas as pd
import pathlib
from sklearn.neighbors import KNeighborsClassifier

# 学習データ読み込み
train = pd.read_csv('data.csv')
train.shape

(3444, 17)

In [2]:
# 予測対象読み込み
target = pd.read_csv('yasuda-kinen-2019.csv')
target

Unnamed: 0,date,race,weather,horses,condition,fam,gate,number,name,sex,age,weight,odds,popular,weight_diff,training_center
0,2019-06-02,安田記念,晴,16,良,0,1,1,ケイアイノーテック,牡,4,58,55.3,11,0,西
1,2019-06-02,安田記念,晴,16,良,0,1,2,アエロリット,牝,5,56,13.5,3,0,東
2,2019-06-02,安田記念,晴,16,良,0,2,3,サクラアンプルール,牡,8,58,138.7,15,0,西
3,2019-06-02,安田記念,晴,16,良,0,2,4,サングレーザー,牡,5,58,27.1,6,0,西
4,2019-06-02,安田記念,晴,16,良,0,3,5,インディチャンプ,牡,4,58,21.6,5,0,西
5,2019-06-02,安田記念,晴,16,良,0,3,6,グァンチャーレ,牡,7,58,77.7,13,0,西
6,2019-06-02,安田記念,晴,16,良,0,4,7,モズアスコット,牡,5,58,30.5,7,0,西
7,2019-06-02,安田記念,晴,16,良,0,4,8,ステルヴィオ,牡,4,58,20.6,4,0,東
8,2019-06-02,安田記念,晴,16,良,0,5,9,スマートオーディン,牡,6,58,61.6,12,0,西
9,2019-06-02,安田記念,晴,16,良,0,5,10,フィアーノロマーノ,牡,5,58,33.9,9,0,西


In [3]:
def divide(df):
    """ラベル分割.
    """
    df = df.copy()
    df.drop(['date', 'race', 'name'], axis=1, inplace=True)
    weather_index = {x: i for i, x in enumerate(['晴', '曇', '小雨', '雨'])}
    df['weather'] = df.weather.map(lambda x: weather_index[x])
    condition_index = {x: i for i, x in enumerate(['良', '稍', '重', '不'])}
    df['condition'] = df.condition.map(lambda x: condition_index[x])
    df['fam'] = df.fam.map(int)
    sex_index = {x: i for i, x in enumerate(['牡', '牝', 'セ'])}
    df['sex'] = df.sex.map(lambda x: sex_index[x])
    training_center_index = {x: i for i, x in enumerate(['東', '西', '外', '地', '他'])}
    df['training_center'] = df.training_center.map(lambda x: training_center_index[x])
    if 'result' in df:
        df = df[df.result.astype(str).str.isdigit()]
        df['result'] = df.result.astype(int)
    df = df.dropna()
    return (df[[x for x in df.columns if not x == 'result']].values,
            df.result.map(lambda x: 1 if x <= 3 else 0).values if 'result' in df else None)

In [4]:
# 学習データをラベル分割
X, y = divide(train)
X.shape, y.shape

((3379, 13), (3379,))

In [5]:
# 予測対象をラベル分割
X_pred, _ = divide(target)
X_pred.shape

(16, 13)

In [6]:
# 現ナマを召喚
knc = KNeighborsClassifier(n_neighbors=1)  # k は適当
knc.fit(X, y)

# 予測
y_pred = knc.predict(X_pred)

# 結果
target.assign(pred=y_pred)

Unnamed: 0,date,race,weather,horses,condition,fam,gate,number,name,sex,age,weight,odds,popular,weight_diff,training_center,pred
0,2019-06-02,安田記念,晴,16,良,0,1,1,ケイアイノーテック,牡,4,58,55.3,11,0,西,0
1,2019-06-02,安田記念,晴,16,良,0,1,2,アエロリット,牝,5,56,13.5,3,0,東,1
2,2019-06-02,安田記念,晴,16,良,0,2,3,サクラアンプルール,牡,8,58,138.7,15,0,西,0
3,2019-06-02,安田記念,晴,16,良,0,2,4,サングレーザー,牡,5,58,27.1,6,0,西,1
4,2019-06-02,安田記念,晴,16,良,0,3,5,インディチャンプ,牡,4,58,21.6,5,0,西,0
5,2019-06-02,安田記念,晴,16,良,0,3,6,グァンチャーレ,牡,7,58,77.7,13,0,西,0
6,2019-06-02,安田記念,晴,16,良,0,4,7,モズアスコット,牡,5,58,30.5,7,0,西,0
7,2019-06-02,安田記念,晴,16,良,0,4,8,ステルヴィオ,牡,4,58,20.6,4,0,東,1
8,2019-06-02,安田記念,晴,16,良,0,5,9,スマートオーディン,牡,6,58,61.6,12,0,西,0
9,2019-06-02,安田記念,晴,16,良,0,5,10,フィアーノロマーノ,牡,5,58,33.9,9,0,西,0
