<a href="https://colab.research.google.com/github/kz2681/marine/blob/master/Marine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#パラメータ

##pip


In [0]:
!pip install category_encoders
!pip install adabound

Collecting category_encoders
[?25l  Downloading https://files.pythonhosted.org/packages/44/57/fcef41c248701ee62e8325026b90c432adea35555cbc870aff9cfba23727/category_encoders-2.2.2-py2.py3-none-any.whl (80kB)
[K     |████                            | 10kB 18.8MB/s eta 0:00:01[K     |████████▏                       | 20kB 4.1MB/s eta 0:00:01[K     |████████████▏                   | 30kB 5.3MB/s eta 0:00:01[K     |████████████████▎               | 40kB 5.5MB/s eta 0:00:01[K     |████████████████████▎           | 51kB 4.7MB/s eta 0:00:01[K     |████████████████████████▍       | 61kB 5.1MB/s eta 0:00:01[K     |████████████████████████████▍   | 71kB 5.6MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 4.1MB/s 
Installing collected packages: category-encoders
Successfully installed category-encoders-2.2.2
Collecting adabound
  Downloading https://files.pythonhosted.org/packages/cd/44/0c2c414effb3d9750d780b230dbb67ea48ddc5d9a6d7a9b7e6fcc6bdcff9/adabound-0.0.5-py3-n

## 設定

In [0]:
import argparse
import gc
import adabound
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torch
import category_encoders as ce
from sklearn.preprocessing import StandardScaler
import copy
import sys
import pandas as pd
import numpy as np
import datetime
import sqlite3
import glob
import os
import logging
import pickle
import json


def setup_logger(name, logfile):
    logger = logging.getLogger(name)

    handlers = logger.handlers
    if handlers:
        for h in handlers:
            logger.removeHandler(h)

    logger.setLevel(logging.DEBUG)
    _detail_formatting = '[%(asctime)s][%(levelname)s][%(name)s]' + \
    '[%(funcName)s][%(lineno)s] %(message)s'

    fh = logging.FileHandler(logfile)
    fh.setLevel(logging.DEBUG)
    fh_formatter = logging.Formatter(_detail_formatting)
    fh.setFormatter(fh_formatter)

    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    ch_formatter = logging.Formatter(_detail_formatting)
    ch.setFormatter(ch_formatter)

    logger.addHandler(fh)
    logger.addHandler(ch)

    return logger

  import pandas.util.testing as tm


# クラス


##Common

In [0]:
class Common:
    def __init__(self, base_dir):
        self.base_dir = base_dir
        self.model_dir = base_dir +'data/model'
        self.db_name = self.base_dir + 'data/sqlite/keiba'
        self.prediction_dir = None


    def set_model_code(self, model_code=None):
        if not model_code:
            now = datetime.datetime.now()
            model_code = 'm' + now.strftime('%Y%m%d%H%M%S')
            self.model_dir = self.model_dir + '/' + model_code
            os.mkdir(self.model_dir)
        else:
            self.model_dir = self.model_dir + '/' + model_code
        return model_code

        
    def save_encoders(self, StandardScaler, OneHotEncoder):
        pickle.dump(StandardScaler, open(
            self.model_dir + '/StandardScaler.pickle', 'wb'))
        pickle.dump(OneHotEncoder, open(
            self.model_dir + '/OneHotEncoder.pickl', 'wb'))

    def load_encoders(self):
        StandardScaler = pickle.load(
            open(self.model_dir + '/StandardScaler.pickle', 'rb'))
        OneHotEncoder = pickle.load(
            open(self.model_dir + '/OneHotEncoder.pickl', 'rb'))
        return StandardScaler, OneHotEncoder

    def save_HP(self, HP, prediction=False):
        dir = self.model_dir
        if prediction:
            dir = self.prediction_dir
        
        f = open(dir + '/hp.json', 'w')
        json.dump(HP, f)
        f.close()

    def load_HP(self, prediction=False):
        dir = self.model_dir
        if prediction:
            dir = self.prediction_dir

        with open(dir + '/hp.json') as f:
            df = json.load(f)
        return df

    def save_learning_file(self, train_df, test_df):
        train_df.to_csv(self.model_dir + '/train.csv')
        test_df.to_csv(self.model_dir + '/test.csv')

    def read_learning_file(self):
        train_df = pd.read_csv(self.model_dir + '/train.csv')
        test_df = pd.read_csv(self.model_dir + '/test.csv')
        return train_df, test_df

    def save_learning_id_file(self, train_id_df, test_id_df):
        train_id_df.to_csv(self.model_dir + '/train_id.csv')
        test_id_df.to_csv(self.model_dir + '/test_id.csv')

    def read_learning_id_file(self):
        train_id_df = pd.read_csv(self.model_dir + '/train_id.csv')
        test_id_df = pd.read_csv(self.model_dir + '/test_id.csv')
        return train_id_df, test_id_df

    def save_group(self, group_train, group_test):
        pickle.dump(group_train, open(
            self.model_dir + '/group_train.pickle', 'wb'))
        pickle.dump(group_test, open(
            self.model_dir + '/group_test.pickle', 'wb'))

    def read_group(self):
        group_train = pickle.load(
            open(self.model_dir + '/group_train.pickle', 'rb'))
        group_test = pickle.load(
            open(self.model_dir + '/group_test.pickle', 'rb'))
        return group_train, group_test

    def start_prediction(self, model_code=None):
        self.model_dir
        
        if not model_code:
            li = []
            for f in os.listdir(self.model_dir):
                if os.path.isdir(os.path.join(self.model_dir, f)):
                    li.append(f)

            li.sort(reverse=True)
            model_code = li[0]
        now = datetime.datetime.now()
        self.model_dir = self.model_dir + '/' + model_code
        prediction_code = 'p' + now.strftime('%Y%m%d%H%M%S')
        self.prediction_dir = self.base_dir +'data/prediction/' + prediction_code
        os.mkdir(self.prediction_dir)
        return prediction_code

        
    def model_path(self):
        return self.model_dir + '/model'

    def learning_years(self):
        return [
            2014, 2015, 2016, 2017, 2018, 2019
        ]

    def master_years(self):
        return [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]

    def read_race_card(self, folder, date):
        df = pd.read_csv(
            self.base_dir + 'data/{0}/race_card_{1}.csv'.format(folder, date))
        return df

    def save_race_card(self, df, folder, date):
        df.to_csv(self.base_dir +
                  'data/{0}/race_card_{1}.csv'.format(folder, date))

    def save_df(self, df, folder, label, year=None):
        if year:
            df.to_csv(self.base_dir +
                      'data/{0}/{1}_{2}.csv'.format(folder, label, str(year)))
        else:
            df.to_csv(self.base_dir + 'data/{0}/{1}.csv'.format(folder, label))

    def read_df(self, folder, label, year):
        df = pd.read_csv(self.base_dir +
                         'data/{0}/{1}_{2}.csv'.format(folder,
                                                       label, str(year)))
        return df

    def read_master(self, label):
        files = glob.glob(self.base_dir + 'data/master/{}*.csv'.format(label))
        df = pd.DataFrame()
        for f in files:
            df = df.append(pd.read_csv(f),  ignore_index=True)

        return df

    def read_learning_data(self, label):
        files = glob.glob(self.base_dir + 'data/learn/{}*.csv'.format(label))
        df = pd.DataFrame()
        for f in files:
            df = df.append(pd.read_csv(f),  ignore_index=True)
        return df

    def save_prediction(self, df, p):
        df.to_csv(self.prediction_dir + '/{}.csv'.format(p))

    def read_prediction(self, p):
        return pd.read_csv(self.prediction_dir + '/{}.csv'.format(p))

    def show_result(self, date, prediction_code, result=None):
        HP = self.load_HP(True)
        ver = HP['ver']
        pref = 'P'
        self.prediction_dir = self.base_dir +'data/prediction/' + prediction_code
        pred = self.read_prediction(date)
        df = self.read_race_card('learn', date)
        df = df.sort_values(['id', 'uma_code'])
        df = df.reset_index()

        df = df[['id', 'uma_code', 'no', 'name', 'entry']]
        pred = pred.reset_index()
        df = pd.merge(df, pred, on='uma_code', how='left')
        if not result is None:
            pref = 'R'
            df = pd.merge(df, result, on='uma_code')

        df = df.sort_values(['id', 'uma_code'],
                            ascending=[True, False])
        vc = df['id'].value_counts(sort=False)
        vc = vc.sort_index(ascending=True)
        text = []
        text.append('競馬予想({0}) Marine ver.{1}'.format(date, ver))
        text.append('')
        for index, value in vc.iteritems():
            d = df[df['id'] == index]
            d = d.sort_values(['prediction', 'no'], ascending=[False, True])
            count = 0
            umas = len(d[d['prediction'] > 0])
            if umas == 0:
                continue
            for i, row in d.iterrows():
                if count == 0:
              
                    text.append('{0} {1}R {2}頭'.format(
                        self.course_transdorm(
                            str(index)[8:10]), str(index)[10:12],len(d)))
                        
                if np.isnan(row['prediction']):
                    x = '  ----'
                else:
                    p = round(row['prediction'], 2)
                    x = round(p / (20/umas), 2)
                    x = '{:.02f}'.format(x).rjust(6)

                st = '{0} {1} {2}'.format(str(row['no']).zfill(2),
                                          row['name'].ljust(9, '　'),
                                          x)
                if not result is None:
                    f = int(row['favor'])
                    r = int(row['rank'])

                    st += '  '
                    if f == 0:
                        st += '------'
                    else:
                        st += '{0}人気'.format(str(f).rjust(2))
                    st += '  '
                    if r == 0:
                        st += '----'
                    else:
                        st += '{0}着'.format(str(r).rjust(2))
                   
                text.append(st)
                count += 1
            
                if count == 5:
                    text.append('>>>')
        
            text.append(
                '----------------------------')
            text.append('')

        with open(self.prediction_dir+ '/{}{}.txt'.format(pref, date), 'w',
                  newline='\n', encoding='utf-8') as f:
            for t in text:
                f.write(t + '\n')
                print(t)

    def course_transdorm(self, course):
        x = int(course)
        if x == 1:
            return '札幌'
        elif x == 2:
            return '函館'
        elif x == 3:
            return '福島'
        elif x == 4:
            return '新潟'
        elif x == 5:
            return '東京'
        elif x == 6:
            return '中山'
        elif x == 7:
            return '中京'
        elif x == 8:
            return '京都'
        elif x == 9:
            return '阪神'
        elif x == 10:
            return '小倉'


##DB

In [0]:
class DB:
    def __init__(self):
        self._cur = None
        self.db_name = common.db_name


    def open(self):
        conn = sqlite3.connect(self.db_name)
        conn.row_factory = dict_factory
        self._cur = conn.cursor()

    def exec(self, sql):
        if not self._cur:
            open()
        self._cur.execute(sql)
        return self._cur.fetchall()

    def close(self):
        self._cur.close()


def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d

##Race

In [0]:

class Race(object):
    def __init__(self, cond, umas):
        self.cond = cond
        self.umas = umas
        if cond.entry == 0:
            cond.entry = len(umas)

    def header(self):

        h = []
        for n, v in self.cond.__dict__.items():
            h.append(n)

        for n, v in self.umas[0].__dict__.items():
            if not n.endswith('_race'):
                h.append(n)

        for n, v in self.umas[0].last_race.__dict__.items():
            h.append('l1' + n)

        for n, v in self.umas[0].last2_race.__dict__.items():
            h.append('l2' + n)

        for n, v in self.umas[0].last3_race.__dict__.items():
            h.append('l3' + n)
        return h

    def is_valid(self):
        return self.cond.is_valid()

    def set_entry(self):
        self.cond.entry = len(self.umas)

    def to_array(self):
        ret = []
        for u in self.umas:
            u.set_no_rate(self.cond.entry)
            ret.append(self.cond.to_array() +
                       u.to_array() +
                       u.last_race.to_array() +
                       u.last2_race.to_array() +
                       u.last3_race.to_array())
        return ret


class Cond(object):
    def __init__(self, record, condition):
        self.year = int(record['Year'])
        self.date = record['Year'] + record['MonthDay']
        self.course = course_value(record['JyoCD'])
        self.race_num = record['RaceNum']
        self.id = str(self.date) + str(self.course).zfill(2) + \
            str(self.race_num)
        self.track = track(record['TrackCD'])
        self.distance = int(record['Kyori'])
        self.distance_code = distance_value(self.distance)
        self.category = category(record['JyokenCD5'], record['GradeCD'])
        self.category_code = category_code(self.category)

        if condition != 0:
            self.condition = int(condition)

        elif self.track == 1 or self.track == 2:
            self.condition = int(record['SibaBabaCD'])
        else:
            self.condition = int(record['DirtBabaCD'])
        self.condition_code = condition_code(self.condition)
        self.handicap = handicap(record['JyuryoCD'])
        self.mare_only = mare_only(record['KigoCD'])
        self.entry = int(record['SyussoTosu'])

    def is_valid(self, forcast=False):
        if self.track == 0:
            return False
        if self.category == 'NU':
            return False

        if not forcast:
            if self.entry < 10:
                return False

        return True

    def to_array(self):
        h = []
        for n, v in self.__dict__.items():
            h.append(v)
        return h


class Uma(object):

    def __init__(self, record):
        self.rank = record['NyusenJyuni']
        self.uma_code = record['KettoNum']
        self.age = int(record['Barei'])
        self.sex = int(record['SexCD'])
        self.odds = int(record['Odds'])
        self.favor = int(record['Ninki'])
        self.jockey = record['KisyuCode']
        self.weight = int(record['Futan'])
        self.no = int(record['Umaban'])
        self.trainer = record['ChokyosiCode']
        self.father = record['Ketto3InfoHansyokuNum1']
        self.owner = record['BanusiCode']
        self.name = record['Bamei']
        self.long_interval = False
        self.last_race = LastRace()
        self.last2_race = LastRace()
        self.last3_race = LastRace()
        self.win_rate = 0
        self.double_rate = 0
        self.prize_rate = 0
        self.no_rate = 0

    def set_no_rate(self, entry):
        self.no_rate = self.no / entry

    def set_results(self, records):
        last_races = []
        run = 0
        win = 0
        double = 0
        prize = 0

        for r in records:
            last = LastRace()
            last.set(r)
            if last.is_valid():
                last_races.append(last)
                run += 1
                if last.place == 1:
                    win += 1
                if last.place <= 3 and last.place > 0:
                    double += 1
                if last.place <= 5 and last.place > 0:
                    prize += 1
        if run > 0:
            self.win_rate = win/run
            self.double_rate = double / run
            self.prize_rate = prize / run

        if len(last_races) == 0:
            return
        elif len(last_races) == 1:
            self.last_race = last_races[0]
            self.last2_race = copy.copy(last_races[0])
            self.last2_race.dummy_flg = 1
            self.last3_race = copy.copy(last_races[0])
            self.last3_race.dummy_flg = 1
        elif len(last_races) == 2:
            self.last_race = last_races[0]
            self.last2_race = last_races[1]
            self.last3_race = copy.copy(last_races[1])
            self.last3_race.dummy_flg = 1
        else:
            self.last_race = last_races[0]
            self.last2_race = last_races[1]
            self.last3_race = last_races[2]

        '''
        if len(last_races) > 0:
            self.last_race = last_races[0]
        if len(last_races) > 1:
            self.last2_race = last_races[1]
        if len(last_races) > 2:
            self.last3_race = last_races[2]
        '''

    def is_valid(self):
        if self.no == 0:
            return False
        else:
            return True

    def to_array(self):
        h = []
        for n, v in self.__dict__.items():
            if not n.endswith('_race'):
                h.append(v)
        return h


class LastRace(object):
    def __init__(self):
        self.year = 0
        self.date = None
        self.course = 0
        self.race_num = 0
        self.track = 0
        self.distance = 0
        self.distance_code = 0
        self.category = 0
        self.category_code = 0
        self.condition = 0
        self.condition_code = 0
        self.handicap = 0
        self.mare_only = 0
        self.entry = 0
        self.no = 0
        self.race_l3F = 0
        self.time = 0
        self.l3F = 00
        self.p3 = 0
        self.p4 = 0
        self.p4_rate = 0
        self.favor = 0

        self.odds = 0
        self.jockey = 0
        self.place = 0
        self.plus_minus = 0
        self.weight = 0
        self.horse_weight = 0
        self.speed = 0
        self.time_difference = 0
        self.l3F_differnece = 0
        self.race_time = 0
        self.no_rate = 0
        self.place_rate = 0
        self.dummy_flg = 0

    def set(self, record):
        self.year = int(record['Year'])
        self.date = record['Year'] + record['MonthDay']
        self.course = course_value(record['JyoCD'])
        self.race_num = record['RaceNum']
        self.track = track(record['TrackCD'])
        self.jockey = record['KisyuCode']
        self.distance = int(record['Kyori'])
        self.distance_code = distance_value(self.distance)
        self.category = category(record['JyokenCD5'], record['GradeCD'])
        self.category_code = category_code(self.category)
        if self.track == 1 or self.track == 2:
            self.condition = int(record['SibaBabaCD'])
        else:
            self.condition = int(record['DirtBabaCD'])
        self.condition_code = condition_code(self.condition)
        self.handicap = handicap(record['JyuryoCD'])
        self.mare_only = mare_only(record['KigoCD'])
        self.entry = int(record['SyussoTosu'])
        self.no_rate = 0
        if (self.entry > 0):
            self.no_rate = int(record['Umaban']) / self.entry
        self.no = int(record['Umaban']) 
        self.race_l3F = int(record['rHaronTimeL3'])
        self.time = to_seconds(record['Time'])

        self.l3F = int(record['uHaronTimeL3'])
        self.p3 = int(record['Jyuni3c'])
        self.p4 = int(record['Jyuni4c'])
        self.favor = int(record['Ninki'])

        self.odds = int(record['Odds'])
        self.place = int(record['NyusenJyuni'])
        self.place_rate = 0
        self.p4_rate = 0
        if (self.entry > 0):
            self.place_rate = self.place / self.entry
            self.p4_rate = self.p4 / self.entry
        self.plus_minus = to_kg(record['ZogenFugo'], record['ZogenSa'])
        self.weight = int(record['Futan'])
        try:
            self.horse_weight = int(record['BaTaijyu'])
        except Exception:
            self.horse_weight = np.nan
        self.speed = 0
        if self.distance > 0:
            self.speed = self.time / self.distance

        self.time_difference = to_seconds(record['TimeDiff'])
        self.l3F_differnece = self.l3F - self.race_l3F
        self.race_time = self.time - self.time_difference

    def to_array(self):
        h = []
        for n, v in self.__dict__.items():
            h.append(v)
        return h

    def is_valid(self):
        if self.track == 0:  # 障害競走は除外
            return False
        if self.place == 0:
            return False   # 完走しないは除外

        if self.favor == 0:
            return False  # 除外は除外

        if self.time_difference > 30:  # 3秒以上の着差があるレースは除外 2->3へ
            return False

        if self.entry == 0:
            return False

        return True


def condition_code(c):
    return c


def track(cd):
    if cd == '10':
        return 1
    elif cd <= '22':
        return 2
    elif cd <= '29':
        return 3
    else:
        return 0



def category(code1, code2):

    if code1 == '005':
        return '1C'
    elif code1 == '010':
        return '2C'
    elif code1 == '016':
        return '3C'
    elif code1 == '701' or code2 == '702':
        return 'NU'
    elif code1 == '703':
        return 'ND'
    elif code1 == '999':
        if code2 == 'A':
            return 'G1'
        elif code2 == 'B':
            return 'G2'
        elif code2 == 'C':
            return 'G3'
        elif code2 == 'D':
            return 'OP'
        elif code2 == 'E':
            return 'OP'
        elif code2 == 'L':
            return 'OP'
    else:
        return 'E'


def category_code(c):
    if c == 'G1':
        return 1
    elif c == 'G2':
        return 2
    elif c == 'G3':
        return 3
    elif c == 'OP':
        return 4
    elif c == '1C':
        return 5
    elif c == '2C':
        return 6
    elif c == '3C':
        return 7
    elif c == 'NU':
        return 8
    elif c == 'ND':
        return 9
    return 0


def course_value(c):
    if c == '01':
        return 1
    elif c == '02':
        return 2
    elif c == '03':
        return 3
    elif c == '04':
        return 4
    elif c == '05':
        return 5
    elif c == '06':
        return 6
    elif c == '07':
        return 7
    elif c == '08':
        return 8
    elif c == '09':
        return 9
    elif c == '10':
        return 10
    return 0


def handicap(code):
    if code == '1':
        return 1
    return 0


def mare_only(code):
    if code[2] == '2':
        return 1
    return 0


def to_seconds(time):
    m = 0
    f = 1
    if len(time) > 3:
        if time[0] == '+':
            pass

        elif time[0] == '-':
            f = -1

        else:
            m = int(time[0])

        time = time[1:]

    s = int(time)
    return m*600 + s*f


def to_kg(f, v):
    if v == '':
        return 0
    f = 1
    if v == '-':
        f = -1
    x = 0
    try:
        x = int(v)
        return x * f
    except Exception:
        return 0


def distance_value(distance):
    li = [0] * 8
    if distance == 0:
        return 0

    if distance <= 1300:
        return 1
    elif distance <= 1500:
        return 2
    elif distance <= 1700:
        return 3
    elif distance <= 1900:
        return 4
    elif distance <= 2100:
        return 5
    elif distance <= 2300:
        return 6
    elif distance <= 2500:
        return 7
    elif distance <= 3600:
        return 8
    return 0


def uma_ban(u):
    li = [0] * 18
    if u == 0:
        return li
    li[u-1] = 1
    return li


class Return:
    def __init__(self, record):
        self.year = int(record['Year'])
        self.date = record['Year'] + record['MonthDay']
        self.course = course_value(record['JyoCD'])
        self.race_num = record['RaceNum']
        self.id = str(self.date) + str(self.course).zfill(2) + \
            str(self.race_num)
        self.umaren1no = record['PayUmarenKumi1']
        self.umaren1pay = record['PayUmarenPay1']
        self.umaren2no = record['PayUmarenKumi2']
        self.umaren2pay = record['PayUmarenPay2']
        self.umaren3no = record['PayUmarenKumi3']
        self.umaren3pay = record['PayUmarenPay3']
        self.sanrenpuku1no = record['PaySanrenpukuKumi1']
        self.sanrenpuku1pay = record['PaySanrenpukuPay1']
        self.sanrenpuku2no = record['PaySanrenpukuKumi2']
        self.sanrenpuku2pay = record['PaySanrenpukuPay2']
        self.sanrenpuku3no = record['PaySanrenpukuKumi3']
        self.sanrenpuku3pay = record['PaySanrenpukuPay3']

    def to_array(self):
        h = []
        for n, v in self.__dict__.items():
            h.append(v)
        return h

    def header(self):
        h = []
        for n, v in self.__dict__.items():
            h.append(n)
        return h
    
        



# 前処理


In [0]:

def test():
    db = DB(common.db_name)
    db.open()
    year = '2020'
    date = '0418'
    jo_cd = '03'
    race_num = '10'
    r = create_race_data(year, date, jo_cd, race_num)
    data = r.to_array()
    print(data)
    db.close()

def get_return(year):
    sql = (' select * from N_HARAI where year = "{}"'.format(year))
    db = DB()
    db.open()
    records = db.exec(sql)
    returns = []
    header = ''
    for r in records:
        ret = Return(r)
        if header == '':
            header = ret.header()
        returns.append(ret.to_array())
    db.close()
    df = pd.DataFrame(np.array(returns), columns=header)
    common.save_df(df, 'master', 'return', year)


def prepare():
    for y in common.learning_years():
        df = pr_main(y)
        common.save_df(df, 'learn', 'horse', y)


def prepare_this_year():
    today = datetime.date.today()
    y = today.year
    df = pr_main(y)
    common.save_df(df, 'learn', 'horse', y)


def result(date, prediction_code):
    y, md = date[:4], date[4:8]
    db = DB()
    db.open()
    sql = ('select year, MonthDay, JyoCD, racenum from n_race '
           'where year = "{0}" and monthday = "{1}" and '
           'JyoCD <= "10" and (DataKubun = "５" or DataKubun = "６" '
           'or DataKubun = "7")' .format(y, md))

    races = db.exec(sql)
    if len(races) == 0:
        print('no result')
        return False

    df = pd.DataFrame()
    for r in races:
        sql = ('select *  from n_uma_race  where year ="{0}" and '
               'monthday ="{1}" and jyocd="{2}" and racenum="{3}" '
               .format(r['Year'], r['MonthDay'], r['JyoCD'], r['RaceNum']))
        records = db.exec(sql)

        for u in records:
            df = df.append({'uma_code': int(u['KettoNum']), 'favor': u['Ninki'],
                            'rank': u['KakuteiJyuni']}, ignore_index=True)

    db.close()
    common.show_result(date, prediction_code, df)


def forcast(date, condition=1):
    y, m, d = date[:4], date[4:6], date[6:8]
    db = DB()
    db.open()
    sql = ('select year, MonthDay, JyoCD, racenum from n_race '
           'where year = "{0}" and monthday = "{1}" and '
           'JyoCD <= "10"' .format(y, str(m).zfill(2)+str(d).zfill(2)))
    print(sql)
    races = db.exec(sql)
    if len(races) == 0:
        print('no race to predict')
        return False
    data = []
    count = 0
    header = ''
    for r in races:
        x = create_race_data(db, r['Year'], r['MonthDay'],
                             r['JyoCD'], r['RaceNum'], condition, True)
        if x:
            data.extend(x.to_array())
            count = count + 1
            if header == '':
                header = x.header()
    db.close
    df1 = pd.DataFrame(np.array(data), columns=header)

    common.save_race_card(df1, 'raw', date)

    df = pr_main('forcast', date)
    common.save_race_card(df, 'learn', date)

    return True


def create_return_this_year():
    today = datetime.date.today()
    y = today.year
    get_return(y)


def create_return():
    for y in common.learning_years():
        get_return(y)


def create_horse_data_this_year():
    today = datetime.date.today()
    y = today.year
    ex_main(y)


def create_horse_data():
    for y in common.learning_years():
        ex_main(y)


def ex_main(year):
    db = DB()
    db.open()
    sql = ('select year, MonthDay, JyoCD, racenum from n_race '
           'where year = "{0}" and DataKubun = "7" and '
           'JyoCD <= "10"' .format(year))
    races = db.exec(sql)
    print('{0}/{1}'.format(str(year), str(len(races))))

    data = []
    count = 0
    header = ''
    for r in races:
        x = create_race_data(db, r['Year'], r['MonthDay'],
                             r['JyoCD'], r['RaceNum'], 0)
        if x:
            data.extend(x.to_array())
            count = count + 1
            if header == '':
                header = x.header()
        '''
        if count >= 100:
            break
        '''

    db.close()
    df1 = pd.DataFrame(np.array(data), columns=header)
    common.save_df(df1, 'raw', 'horse', year)


def create_race_data(db, year, date, jo_cd, race_num,
                     condition, forcast=False):
    try:
        sql = ('select *  from n_race where year ="{0}" and '
               'monthday ="{1}" and jyocd="{2}" and '
               'racenum="{3}"'.format(year, date, jo_cd, race_num))
        r = db.exec(sql)[0]
        cond = Cond(r, condition)
        if not cond.is_valid(forcast):
            print('this race is not suitable for learning')
            return

        if forcast:
            sql = ('select *  from n_uma_race  where year ="{0}" and '
                   'monthday ="{1}" and jyocd="{2}" and racenum="{3}" '
                   .format(year, date, jo_cd, race_num))

        else:

            sql = ('select *  from n_uma_race  where year ="{0}" and '
                   'monthday ="{1}" and jyocd="{2}" and racenum="{3}" '
                   'and ninki <> "00" order by '
                   'NyusenJyuni'.format(year, date, jo_cd, race_num))

        records = db.exec(sql)
        umas = []
        for u in records:
            sql = (
                'select * from n_uma where kettonum="{}"'.format(
                    u['KettoNum']))
            records = db.exec(sql)
            u['Ketto3InfoHansyokuNum1'] = records[0]['Ketto3InfoHansyokuNum1']
            uma = Uma(u)

            sql = ('select a.*, b.*, a.HaronTimeL3 as uHaronTimeL3, '
                   'b.HaronTimeL3 as rHaronTimeL3  from n_uma_race '
                   'a, n_race b  where a.kettonum="{2}" and '
                   '((a.year < "{0}") or (a.year = "{0}" and '
                   'a.monthday < "{1}" )) and a.year = b.year '
                   'and a.monthday = b.monthday and a.jyocd = b.jyocd '
                   'and a.racenum = b.racenum order by year desc, monthday '
                   'desc limit 7'.format(year, date, u['KettoNum']))
            records = db.exec(sql)
            uma.set_results(records)
            if uma.is_valid():
                umas.append(uma)
        if forcast:
            if len(umas) < 10:
                return
        target = Race(cond, umas)
        return target

    except Exception as e:
        tb = sys.exc_info()[2]
        print("message:{0}".format(e.with_traceback(tb)))
        raise e


def create_master_this_year():
    today = datetime.date.today()
    y = today.year
    common.save_df(jockey(y), 'master', 'jockey', y)
    common.save_df(trainer(y), 'master', 'trainer', y)
    common.save_df(owner(y), 'master', 'owner', y)

    df = blood(y, '10', '21', '_turf')
    common.save_df(df.append(blood(y, '22', '29', '_dirt'),
                             ignore_index=True), 'master', 'blood', y)


def creat_master():

    j_df = pd.DataFrame()
    t_df = pd.DataFrame()
    o_df = pd.DataFrame()
    b_df = pd.DataFrame()
    years = common.master_years()
    for y in years:

        t = jockey(y)
        j_df = j_df.append(t,  ignore_index=True)

        t = trainer(y)
        t_df = t_df.append(t, ignore_index=True)

        t = owner(y)
        o_df = o_df.append(t, ignore_index=True)

        t = blood(y, '10', '21', '_turf')
        b_df = b_df.append(t, ignore_index=True)

        t = blood(y, '22', '29', '_dirt')
        b_df = b_df.append(t, ignore_index=True)

    common.save_df(j_df, 'master', 'jockey', str(
        years[0]) + "-" + str(years[len(years)-1]))
    common.save_df(t_df, 'master', 'trainer', str(
        years[0]) + "-" + str(years[len(years)-1]))
    common.save_df(o_df, 'master', 'owner', str(
        years[0]) + "-" + str(years[len(years)-1]))
    common.save_df(b_df, 'master', 'blood', str(
        years[0]) + "-" + str(years[len(years)-1]))


def jockey(year):
    db = DB()
    db.open
    sql = ('select kisyucode as code, count(*) as count, '
           'kisyuryakusyo as name from n_uma_race where year ="{}" '
           'group by kisyucode'.format(year))
    totals = db.exec(sql)
    sql = ('select kisyucode as code, count(*) as count '
           'from n_uma_race where year ="{}" and Nyusenjyuni = "01" '
           'group by kisyucode'.format(year))
    wins = db.exec(sql)
    sql = ('select kisyucode as code, count(*) as count '
           'from n_uma_race where year ="{}" and (Nyusenjyuni = "01" '
           'or Nyusenjyuni = "02" or Nyusenjyuni = "03") '
           'group by kisyucode'.format(year))
    double = db.exec(sql)
    db.close()
    return export(totals, wins, double, year)


def trainer(year):
    db = DB()
    db.open
    sql = ('select chokyosicode as code, count(*) as count, '
           'chokyosiryakusyo as name from n_uma_race where year ="{}" '
           'group by chokyosicode'.format(year))
    totals = db.exec(sql)
    sql = ('select chokyosicode as code, count(*) as count '
           'from n_uma_race where year ="{}" and Nyusenjyuni = "01" '
           'group by chokyosicode'.format(year))
    wins = db.exec(sql)
    sql = ('select chokyosicode as code, count(*) as count '
           'from n_uma_race where year ="{}" and (Nyusenjyuni = "01" '
           'or Nyusenjyuni = "02" or Nyusenjyuni = "03") '
           'group by chokyosicode'.format(year))
    double = db.exec(sql)
    db.close()
    return export(totals, wins, double, year)


def owner(year):
    db = DB()
    db.open
    sql = ('select banusicode as code, count(*) as count, '
           'banusiname as name from n_uma_race where year ="{}" '
           'group by banusicode'.format(year))
    totals = db.exec(sql)
    sql = ('select banusicode as code, count(*) as count '
           'from n_uma_race where year ="{}" and Nyusenjyuni = "01" '
           'group by banusicode'.format(year))
    wins = db.exec(sql)
    sql = ('select banusicode as code, count(*) as count '
           'from n_uma_race where year ="{}" and (Nyusenjyuni = "01" '
           'or Nyusenjyuni = "02" or Nyusenjyuni = "03") '
           'group by banusicode'.format(year))
    double = db.exec(sql)
    db.close()
    return export(totals, wins, double, year)


def blood(year, cd1, cd2, label):
    db = DB()
    db.open
    sql = ('select b.ketto3infohansyokunum1 as code, '
           'b.ketto3infobamei1 as name, count(*) as count from '
           'n_uma_race a, n_uma b, n_race c where a.year ="{}" '
           'and a.kettonum = b.kettonum and c.TrackCD >= "{}"'
           'and c.TrackCD <= "{}" and c.year= a.year and '
           'a.monthday = c.monthday and a.jyocd = c.jyocd and '
           'a.racenum = c.racenum group by '
           'b.ketto3infobamei1').format(year, cd1, cd2)
    totals = db.exec(sql)
    sql = ('select b.ketto3infohansyokunum1 as code, count(*) as count from '
           'n_uma_race a, n_uma b, n_race c  where a.year ="{}" '
           'and a.kettonum = b.kettonum and c.TrackCD >= "{}"'
           'and c.TrackCD <= "{}" and c.year= a.year and '
           'a.monthday = c.monthday and a.jyocd = c.jyocd and '
           'a.racenum = c.racenum and a.Nyusenjyuni = "01" '
           'group by b.ketto3infobamei1').format(year, cd1, cd2)
    wins = db.exec(sql)
    sql = ('select b.ketto3infohansyokunum1 as code, count(*) as count from '
           'n_uma_race a, n_uma b, n_race c  where a.year ="{}" '
           'and a.kettonum = b.kettonum and c.TrackCD >= "{}"'
           'and c.TrackCD <= "{}" and c.year= a.year and '
           'a.monthday = c.monthday and a.jyocd = c.jyocd and '
           'a.racenum = c.racenum and (a.Nyusenjyuni = "01" or '
           'a.Nyusenjyuni = "02" or a.Nyusenjyuni = "03") '
           'group by b.ketto3infobamei1').format(year, cd1, cd2)
    double = db.exec(sql)
    db.close()
    return export(totals, wins, double, year, label)


def export(totals, wins, double, year, label=None):
    t = transfom(totals)
    w = transfom(wins)
    p = transfom(double)
    r = []

    for total in totals:
        c = total['code']
        v = t.get(c, 1)
        win = w.get(c, 0)
        place = p.get(c, 0)
        r.append([total['name'], v, win/v, place/v, c])
    if not label:
        df = pd.DataFrame(np.array(r), columns=[
            'name', 'total', 'win', 'double', 'code_'+str(year)])
    else:
        df = pd.DataFrame(np.array(r), columns=[
            'name', 'total', 'win', 'double', 'code_'+str(year)+label])
    return df


def transfom(list):
    r = {}
    for l in list:
        r[l['code']] = l['count']
    return r


def pr_main(year='forcast', date=None):
    j_df = common.read_master('jockey')
    t_df = common.read_master('trainer')
    o_df = common.read_master('owner')
    b_df = common.read_master('blood')
    if year == 'forcast':
        df = common.read_race_card('raw', date)
    else:
        df = common.read_df('raw', 'horse', year)

    d = pd.DataFrame()
    for index, item in df.iterrows():
        j = add_rate_data('jockey', j_df, item['year'], item['jockey'])
        t = add_rate_data('trainer', t_df, item['year'], item['trainer'])
        o = add_rate_data('owner', o_df, item['year'], item['owner'])
        if item['track'] == 3:
            suffix = '_dirt'
        else:
            suffix = '_turf'
        b = add_rate_data(
            'blood', b_df, item['year'], item['father'], suffix)
        l1 = add_rate_data('l1jockey', j_df,  item['l1year'], item['l1jockey'])
        l2 = add_rate_data('l2jockey', j_df,  item['l2year'], item['l2jockey'])
        l3 = add_rate_data('l3jockey', j_df,  item['l3year'], item['l3jockey'])
        j.update(t)
        j.update(o)
        j.update(b)
        j.update(l1)
        j.update(l2)
        j.update(l3)
        d = d.append(j, ignore_index=True)

    df = pd.concat([df, d], axis=1)

    for index, item in df.iterrows():
        if item['jockey'] != item['jockeytest']:
            print('err')
    df = df.drop('jockeytest', axis=1)
    df = df.drop('ownertest', axis=1)
    df = df.drop('trainertest', axis=1)
    df = df.drop('bloodtest', axis=1)
    df = df.drop('l1jockeytest', axis=1)
    df = df.drop('l2jockeytest', axis=1)
    df = df.drop('l3jockeytest', axis=1)
    return df


def add_rate_data(label, df, year, code, suffix=''):

    if year < 2012:
        return {
            label + '_win': 0,
            label + '_double': 0,
            label + 'test': code
        }

    for i in range(3):
        a = df.query('code_{0}{1} == "{2}"'.format(str(year), suffix, code))
        if len(a.index) > 0:
            return {
                label + '_win': a['win'].values[0],
                label + '_double': a['double'].values[0],
                label + 'test': code
            }
        else:
            year -= 1
    return {
        label + '_win': 0,
        label + '_double': 0,
        label + 'test': code
    }




#ディープラーニング


##学習モデル

In [0]:

class Marine(nn.Module):
    def __init__(self, D, H):
        super(Marine, self).__init__()
        self.l1 = nn.Linear(D, H)
        self.b1 = nn.BatchNorm1d(num_features=H)
        self.r1 = nn.ReLU()
        self.d1 = nn.Dropout(p=0.4)
        self.l2 = nn.Linear(H, H)
        self.b2 = nn.BatchNorm1d(num_features=H)
        self.r2 = nn.ReLU()
        self.d2 = nn.Dropout(p=0.4)
        self.l3 = nn.Linear(H, 1)

    def forward(self, x):
        x = self.l1(x)
        x = self.b1(x)
        x = self.r1(x)
        x = self.d1(x)
        x = self.l2(x)
        x = self.b2(x)
        x = self.r2(x)
        x = self.d2(x)
        y = self.l3(x)
        return y



##学習パラメータ

In [0]:

category_columns = ['course',
                    'track',
                    'distance_code',
                    'category_code',
                    'condition_code',
                    'handicap',
                    # 'mare_only',
                    'age',
                    'sex',
                    'l1course',
                    'l1track',
                    'l1distance_code',
                    'l1category_code',
                    'l1condition_code',
                    'l1handicap',
                    # 'l1mare_only',
                    'l2course',
                    'l2track',
                    'l2distance_code',
                    'l2category_code',
                    'l2condition_code',
                    'l2handicap',
                    'l2dummy_flg',
                    # 'l2mare_only',
                    'l3course',
                    'l3track',
                    'l3distance_code',
                    'l3category_code',
                    'l3condition_code',
                    'l3handicap',
                    'l3dummy_flg'#,
                    #'no',
                    #'l1no',
                    #'l2no',
                    #'l3no'
                    # 'l3mare_only'

                    ]

number_columns = ['weight',
                  'win_rate',
                  'double_rate',
                  'prize_rate',
                  'no_rate',
                  'l1p4_rate',
                  'l1odds',
                  'l1plus_minus',
                  'l1weight',
                  'l1horse_weight',
                  'l1speed',
                  'l1time_difference',
                  'l1l3F_differnece',
                  'l1no_rate',
                  'l1place_rate',
                  'l2p4_rate',
                  'l2odds',
                  'l2plus_minus',
                  'l2weight',
                  'l2horse_weight',
                  'l2speed',
                  'l2time_difference',
                  'l2l3F_differnece',
                  'l2no_rate',
                  'l2place_rate',
                  'l3p4_rate',
                  'l3odds',
                  'l3plus_minus',
                  'l3weight',
                  'l3horse_weight',
                  'l3speed',
                  'l3time_difference',
                  'l3l3F_differnece',
                  'l3no_rate',
                  'l3place_rate',
                  'blood_double',
                  'blood_win',
                  'jockey_double',
                  'jockey_win',
                  'l1jockey_double',
                  'l1jockey_win',
                  'l2jockey_double',
                  'l2jockey_win',
                  'l3jockey_double',
                  'l3jockey_win',
                  'owner_double',
                  'owner_win',
                  'trainer_double',
                  'trainer_win'
                  ]


##学習・推論

In [0]:
def construct():
    
    model_code = common.set_model_code()

    df = common.read_learning_data('horse')
    df = clean(df)
    logger.info('race date generated')

    df = df.drop('uma_code', axis=1)
    df = df.sort_values(['id','no'], ascending=[False, False])
    df_group = df['id']

    vc = df_group.value_counts(sort=False)
    vc = vc.sort_index(ascending=False)

    group = []
    for index, value in vc.iteritems():
        group.append(value)

    test_rate = 0.25
    total_size = len(df)
    test_size = total_size*test_rate

    test_df = pd.DataFrame()
    train_df = pd.DataFrame()
    split_index = 0
    for_test = True
    group_test = []
    group_train = []

    logger.info('splitting data with train and test')
    
    for g in group:
        if for_test:
            for_test = False
            group_test.append(g)
            test_df = test_df.append(df[split_index:split_index+g])

        else:
            if len(test_df) <= test_size:
                for_test = True
            group_train.append(g)
            train_df = train_df.append(df[split_index:split_index+g])
        split_index += g

    logger.info('data splitted train:{} test:{}'.format(len(group_train), 
                                                        len(group_test)))

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()

    train_id_df = train_df[['id', 'no']] 
    test_id_df = test_df[['id', 'no']] 

    train_df = train_df.drop('id', axis=1).drop('no', axis=1)
    test_df = test_df.drop('id', axis=1).drop('no', axis=1)

    common.save_learning_file(train_df, test_df)
    common.save_learning_id_file(train_id_df, test_id_df)
    common.save_group(group_train, group_test)
    return model_code



def learn(model_code=None):
    try:
        if not model_code:
            model_code = construct()
        
        else:
            common.set_model_code(model_code)
        
        logger.info('---start learning---')
        group_train, group_test = common.read_group()
        train_df, test_df = common.read_learning_file()
        train_id_df, test_id_df = common.read_learning_id_file()
        return_df = common.read_master('return')
        logger.info('leaning file read')
        x_train, y_train = train_df.drop('answer', axis=1).drop('index', axis=1).drop('Unnamed: 0',axis=1), train_df['answer']
        x_test, y_test = test_df.drop('answer', axis=1).drop('index', axis=1).drop('Unnamed: 0',axis=1), test_df['answer']
        z_train = train_id_df.values
        z_test = test_id_df.values

        #HP['parameters'] = x_train.columns
        HP['num_train_horse'] = len(x_train)
        HP['num_train_race'] =  len(x_test)
        HP['num_test_horse'] =  len(group_train)
        HP['num_test_race'] =   len(group_test)

        D = len(x_train.columns)
        HP['input'] = D
        epochs = HP['epochs']
        node = HP['node']

        x_train = try_gpu(torch.tensor(x_train.values.astype(np.float32)))
        x_test = try_gpu(torch.tensor(x_test.values.astype(np.float32)))

        logger.info('Marine ver.{}'.format(HP['ver']))
        logger.info('Hyper parameter: {}'.format(str(HP)))
        
        
        logger.info('input :{0} node:{1}'.format(D, node))

        yt = np.array(y_train)
        yv = np.array(y_test)
        y_train = try_gpu(torch.tensor(yt[:, np.newaxis]))
        y_test = try_gpu(torch.tensor(yv[:, np.newaxis]))
        model = try_gpu(Marine(D, node))
        model.train()

        if HP['opt'] == 'SGD':
            opt = optim.SGD(model.parameters(), lr=0.0001, weight_decay=0.0001)
        elif HP['opt'] == 'Adabound':
            opt = adabound.AdaBound(model.parameters(),
                                    lr=1e-3, final_lr=0.5)
        else:
            opt = optim.Adam(model.parameters(), weight_decay=0)

        train_loss = []
        test_loss = []
        train_point = []
        test_point = []
        train_hit = []
        test_hit = []
        max_hit = 0
        train_s_return = []
        train_u_return = []
        test_s_return = []
        test_u_return = []
        best_epoch = 0

        del train_df, test_df, train_id_df, test_id_df
        gc.collect()

        for epoch in range(epochs):
            logger.info('epoch :{}'.format(epoch+1))
            cur_batch = 0
            num_iteration = len(group_train)
            ag_train_point = 0
            ag_train_hit = 0
            ag_train_loss = 0
            ag_train_umaren_hit = 0
            ag_train_sanrenpuku_hit = 0
            ag_train_umaren_bet = 0
            ag_train_sanrenpuku_bet = 0
            for g in group_train:
                batch_size = g
                batch_x = x_train[cur_batch: cur_batch + batch_size]
                batch_y = y_train[cur_batch: cur_batch + batch_size]
                opt.zero_grad()

                batch_pred = model(batch_x)
                batch_loss = place_loss(batch_y, batch_pred)
                batch_loss.backward(retain_graph=True)
                batch_point, batch_hit, ub, ug, sb, sg = evaluation(
                    batch_y, batch_pred,
                    z_train[cur_batch: cur_batch + batch_size],
                    return_df)

                batch_loss2 = to_cpu(batch_loss.sum()).detach().numpy()
                batch_loss2 = np.squeeze(batch_loss2)

                ag_train_umaren_hit += ug
                ag_train_sanrenpuku_hit += sg
                ag_train_umaren_bet += ub
                ag_train_sanrenpuku_bet += sb

                ag_train_point += batch_point
                ag_train_hit += batch_hit
                ag_train_loss += batch_loss2
                del batch_x, batch_y
                cur_batch += batch_size
                opt.step()

            loss = ag_train_loss / num_iteration
            point = ag_train_point / num_iteration
            hit = ((ag_train_hit / num_iteration)+1) / 2
            
            train_loss.append(loss)
            train_point.append(point)
            train_hit.append(hit)

            u_return = ag_train_umaren_hit / ag_train_umaren_bet
            s_return = ag_train_sanrenpuku_hit / ag_train_sanrenpuku_bet
            train_u_return.append(u_return)
            train_s_return.append(s_return)

            logger.info(
                'train loss:{0} point:{1} hit:{2}'.format(loss, point, hit))
            logger.info(
                'train umaren return:{} sanrenpuku return:{}'.format(u_return,
                                                               s_return))

            with torch.no_grad():
                num_iteration = len(group_test)
                ag_test_point = 0
                ag_test_hit = 0
                ag_test_loss = 0
                ag_test_umaren_hit = 0
                ag_test_sanrenpuku_hit = 0
                ag_test_umaren_bet = 0
                ag_test_sanrenpuku_bet = 0
                cur_batch = 0
                for g in group_test:
                    batch_size = g
                    batch_x = x_test[cur_batch: cur_batch + batch_size]
                    batch_y = y_test[cur_batch: cur_batch + batch_size]
                    batch_pred = model(batch_x)
                    
                    batch_loss = place_loss(batch_y, batch_pred)

                    batch_loss2 = to_cpu(batch_loss.sum()).detach().numpy()
                    batch_loss2 = np.squeeze(batch_loss2)

                    batch_point, batch_hit, ub, ug, sb, sg = evaluation(
                        batch_y, batch_pred, 
                        z_test[cur_batch: cur_batch + batch_size],
                        return_df)
                    cur_batch += batch_size

                    ag_test_umaren_hit += ug
                    ag_test_sanrenpuku_hit += sg
                    ag_test_umaren_bet += ub
                    ag_test_sanrenpuku_bet += sb

                    ag_test_point += batch_point
                    ag_test_hit += batch_hit
                    ag_test_loss += batch_loss2
                    del batch_x, batch_y

            loss = ag_test_loss / num_iteration
            point = ag_test_point / num_iteration
            hit = ((ag_test_hit / num_iteration)+1)/2
            test_loss.append(loss)
            test_point.append(point)
            test_hit.append(hit)

            u_return = ag_test_umaren_hit / ag_test_umaren_bet
            s_return = ag_test_sanrenpuku_hit / ag_test_sanrenpuku_bet
            test_u_return.append(u_return)
            test_s_return.append(s_return)
            logger.info(
                'test loss:{0} point:{1} hit:{2}'.format(loss, point, hit))
            logger.info(
                'test umaren return:{} sanrenpuku return:{}'.format(u_return,
                                                               s_return))


            if hit > max_hit:
                if epoch > 1:
                    logger.info('max hit ({}) is updated.'.format(hit))
                    max_hit = hit
                    best_epoch = epoch
                    torch.save(model.state_dict(), common.model_path())
                    logger.info('model saved')

        logger.info('epoch {} end'.format(epoch+1))
        logger.info('---finish learning---')
        logger.info('best hit echoch: {0} hit:{1} point：{2}'.format(
            best_epoch+1, max_hit, test_point[best_epoch]))
        logger.info('u_return: {} s_return:{}'.format(test_u_return[best_epoch],
                                                     test_s_return[best_epoch]))


        plt.xlabel('epoch')
        plt.plot(train_loss, label='train_loss')
        plt.plot(test_loss, label='test_loss')
        plt.plot(train_point, label='train_point')
        plt.plot(test_point, label='test_point')
        plt.plot(train_hit, label='train_hit')
        plt.plot(test_hit, label='test_hit')
        plt.legend()
        # plt.show()
        HP['best_epoch'] = best_epoch+1
        HP['u_return'] = test_u_return[best_epoch]
        HP['s_return'] = test_s_return[best_epoch]
        HP['hit'] = max_hit
        HP['point'] = test_point[best_epoch]

        common.save_HP(HP)
    except Exception as e:
        logger.error('error occured', e)


def predict(p, model_code=None):
    try:
        prediction_code = common.start_prediction(model_code)
        logger.info('---start prediction---')
        logger.info('event date:{}'.format(p))
        df = common.read_race_card('learn', p)
        df = clean(df, True)

        df_p_group = df['id']
        vc = df_p_group.value_counts(sort=False)
        vc = vc.sort_index(ascending=False)
        df = df.sort_values(['id', 'no'], ascending=[False, True])
        group = []
        
        for index, value in vc.iteritems():
            group.append(value)

        df_p_uma_code = df['uma_code']

        info = df[['id','no','uma_code']].values

        df = df.drop('id', axis=1)
        df = df.drop('uma_code', axis=1)
        df = df.drop('answer', axis=1)
        df = df.drop('no', axis=1)   
        hp = common.load_HP()

        data = torch.tensor(df.values.astype(np.float32))
        model = Marine(len(df.columns), hp['node'])
        model.load_state_dict(torch.load(common.model_path()))
        model.eval()
        cur_batch = 0
        pred = []

        logger.info('rece :{} horse: {}'.format(len(group), len(df)))

        for g in group:
            batch_size = g
            x = data[cur_batch: cur_batch + batch_size]
            
            y = model(x)
            y = F.softmax(y, dim=0)
            y_num = to_cpu(y).detach().numpy()
            y_num = np.squeeze(y_num)*100         
            z = info[cur_batch: cur_batch + batch_size]  
            cur_batch += batch_size
            pred.extend(y_num)

        df = pd.DataFrame(df_p_uma_code)
        df['prediction'] = pred
        logger.info('---finish prediction---')
        common.save_HP(hp, True)
        common.save_prediction(df, p)
        common.show_result(p, prediction_code)
        return prediction_code
    except Exception as e:
        logger.error('error occured', e)


def define_rank(df, switch=1):
    d = pd.DataFrame()
    for index, value in df.iteritems():
        if switch == 1:
            if value == 99:
                d = d.append({'answer': 99}, ignore_index=True)
            elif value == 0 or value > 3:
                d = d.append({'answer': 0}, ignore_index=True)
            else:
                d = d.append({'answer': 1}, ignore_index=True)
        if switch == 2:
            d = d.append({'answer': value}, ignore_index=True)

    return d


def clean(df, prediction=False):
    df = df[df['l1course'] != 0]
    df = df[df['l2course'] != 0]
    df = df[df['l3course'] != 0]
    if not prediction:
        df = df[df['rank'] != 0]

    # 6頭以下は削除
    vc = df['id'].value_counts()
    for i, v in vc.iteritems():
        if v < 7:
            df = df[df['id'] != i]
    df = df.reset_index()

    df_rank = df['rank']
    df_uma = df['uma_code']
    df_group = df[['id','no']]
    df_category = df[category_columns]

    df_category = df_category.astype('category')
    df_number = df[number_columns]

    if prediction:
        ss, ohe = common.load_encoders()
    else:
        ohe = ce.OneHotEncoder(cols=df_category.columns,
                               handle_unknown='impute')
        ohe.fit(df_category)
        ss = StandardScaler()
        ss.fit(df_number)

    df_category = ohe.transform(df_category)
    df_number = pd.DataFrame(ss.transform(df_number),
                             columns=df_number.columns,
                             index=df_number.index)

    if not prediction:
        common.save_encoders(ss, ohe)

    df_rank = define_rank(df_rank, 1)
    df = pd.concat([df_uma, df_group, df_rank, df_number, df_category],
                   axis=1)

    return df


def place_loss(y, p):
    y = F.softmax(y, dim=0)
    p = F.softmax(p, dim=0)
    delta = 1e-7
    return - torch.sum(y * torch.log(p + delta))


def evaluation(answer, pred, id, return_df):
    race = id[0][1]
    for i in id:
        if i[1] != race:
            logger.error('id not match: {0} {1}'.format(race, i[1]))
            raise Exception('race id not match')
 
    race_return = return_df[return_df['id'] == race].head(1).to_dict(
        orient='records')[0]

    index = []
    for i in range(len(answer)):
        if answer[i] == 1 or answer[i] == 2 or answer[i] == 3:
            index.append(i)
    p_l = []
    for i in range(len(pred)):
        p_l.append({
            'i': i,
            'val': pred[i],
            'no' : id[i][2],
            'hit' : False
        })
    
    p_l.sort(key=lambda x: x['val'], reverse=True)

    point = 0.1
    for i in index:
        if i == p_l[0]['i'] :
            point += 0.3
            p_l[0]['hit'] = True
        elif i == p_l[1]['i']: 
            point += 0.3
            p_l[1]['hit'] = True
        elif i == p_l[2]['i']:
            point += 0.3
            p_l[2]['hit'] = True
        elif i == p_l[3]['i'] :
            point += 0.2
            p_l[3]['hit'] = True
        elif i == p_l[4]['i']:
            point += 0.2
            p_l[4]['hit'] = True

    r = -1
    if len(index) < 3:
        r = -1
    else:
        if point >= 0.8:
            r = 1
    ub, ug, sb, sg = baken(p_l, race_return)
    return point, r, ub, ug, sb, sg 

def baken(hit_no, ret):
    def split_no(s,num):
        st =  "{:.0f}".format(s).zfill(num*2)
        x = []
        for i in range(0,num):
            x.append(int(st[i*2:i*2+2]))
        return x
        
    def hit(pre, ans, num):
        h = 0
        x = 0
        for p in pre:
            for a in ans:
               if p['no'] == a:
                    x +=1
                    if p['hit']:
                        h += 1
                    continue
            if x == num:
                break     
        if h == num:
            return True
        elif x < num:
            return False
        else:
            return False

    umaren_buy = 0
    umaren_get = 0 
    sanrenpuku_buy = 0
    sanrenpuku_get = 0

    if not np.isnan(ret.get('umaren1no')) :
        umaren_buy += 1000
        if hit(hit_no, split_no(ret['umaren1no'],2),2):
            umaren_get += int(ret['umaren1pay'])
    if not np.isnan(ret.get('umaren2no')):
        if hit(hit_no, split_no(ret['umaren2no'],2),2):
            umaren_get += int(ret['umaren2pay'])
    if not np.isnan(ret.get('umaren3no')):
        if hit(hit_no, split_no(ret['umaren3no'],2),2):
            umaren_get += int(ret['umaren3pay'])
    
    if not np.isnan(ret.get('sanrenpuku1no')):
        sanrenpuku_buy += 1000
        if hit(hit_no, split_no(ret['sanrenpuku1no'],3),3):
            sanrenpuku_get += int(ret['sanrenpuku1pay'])
    if not np.isnan(ret.get('sanrenpuku2no')):
        if hit(hit_no, split_no(ret['sanrenpuku2no'],3),3):
            sanrenpuku_get += int(ret['sanrenpuku2pay'])
    if not np.isnan(ret.get('sanrenpuku3no')):
        if hit(hit_no, split_no(ret['sanrenpuku3no'],3),3):
            sanrenpuku_get += int(ret['sanrenpuku3pay'])

    return  umaren_buy, umaren_get, sanrenpuku_buy, sanrenpuku_get

def try_gpu(e):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        return e.to(device)
    return e

def to_cpu(e):
    if torch.cuda.is_available():
        return e.to('cpu')
    else:
        return e

# 実行

In [0]:
from google.colab import drive
drive.mount('/content/drive')
base_path = '/content/drive/My Drive/AI/marine/'
common = Common(base_path)
now = datetime.datetime.now()
logger = setup_logger('Marine', base_path+'data/logs/log_'
            +now.strftime('%Y%m%d%H%M%S')+ '.log')

HP = {
    'ver': 1.0,
    'epochs': 10,
    'node': 1024,
    'opt': 'Adam' #'Adabound' # / SGD / Adam
}

try:

    # マスター更新
    # create_master_this_year()
    # creat_master()

    # 払い戻し情報出力
    #create_return_this_year()
    #create_return()

    #馬データ出力
    #create_horse_data_this_year()
    #create_horse_data()

    #馬データ前処理
    #prepare_this_year()
    #prepare()

    #学習
    model_code='m20200507213616' #モデルコード
    #model_code = construct()
    print(model_code)
    #learn(model_code)

    #推論
    event_date= '20200509' #開催日
    #forcast(event_date)        
    p = predict(event_date, model_code)

    #結果出力
    #event_date= #開催日
    #result(event_date, p)

except Exception:
  logger.exception('error ocurred')

#app()

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


[2020-05-08 11:32:19,853][INFO][Marine][predict][288] ---start prediction---
[2020-05-08 11:32:19,855][INFO][Marine][predict][289] event date:20200509


m20200507213616


[2020-05-08 11:32:32,867][INFO][Marine][predict][319] rece :33 horse: 434
[2020-05-08 11:32:33,027][INFO][Marine][predict][335] ---finish prediction---


競馬予想(20200509) Marine ver.1.0

新潟 02R 15頭
07 ショウナンマトイ　   6.52
13 ダコタブラックヒル   6.25
09 ナムラタイガー　　   5.87
14 テーオーバフェット   5.26
11 スズノプレジャー　   5.21
>>>
05 メイショウヨカゼ　   4.88
04 マイネルカガヤキ　   4.83
12 アンデュレイト　　   4.77
08 キーンウィット　　   4.70
01 アイムクリエイター   4.59
15 エクステンデット　   4.36
10 サイモンバトラー　   4.30
02 タイクーンバゴ　　   4.23
06 カネコメノボル　　   4.22
03 サトノパーシヴァル   ----
----------------------------

新潟 03R 15頭
10 フェールアシュバル   6.08
12 シェーネメロディ　   5.99
08 ヤサカパフィー　　   5.91
11 エタンセル　　　　   5.49
01 リバーランド　　　   5.10
>>>
05 ミューティー　　　   4.90
06 オウケンアマゾネス   4.84
15 クサヒバリ　　　　   4.79
13 ユメミルユメコ　　   4.70
04 ネネサマ　　　　　   4.62
07 ルリアンヴェール　   4.61
14 ツウカイエオス　　   4.49
03 アイヅオイワケ　　   4.34
09 ハルノマリーナ　　   4.14
02 ムーンライトアリア   ----
----------------------------

新潟 05R 15頭
15 ヴェーラ　　　　　   5.82
05 ジャスパーエース　   5.62
09 カクシン　　　　　   5.26
08 ショウナンサモン　   5.25
02 グランドストローク   5.25
>>>
03 ロードクラージュ　   5.17
04 タイセイダンク　　   4.81
10 ピュアサンクス　　   4.71
12 ディーププレザント   4.67
11 ピノタージュ　　　   4.67
01 トマティーナ　　　   4.61
13 ゴーンホーム　　　   4.60
14 サンライズエポック   4.57
06 エ