In [12]:
import pandas as pd
from random import randint, choice
from scipy.stats import binom
from numpy.random import choice
from tqdm.notebook import tqdm

In [13]:
dt = pd.read_csv('forecast1.csv', sep=';')
dt

Unnamed: 0,day_temp,day_pressure,day_obl,day_phen,day_dir,day_wind,areal,city,year,month,...,красноперка,налим,густера,амур,ерш,сазан,подуст,толстолобик,вобла,хариус
0,-4,749,dull,snow,Ю,3,Алтайский край,Барнаул,2020,1,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
1,-1,750,dull,-,Ю,5,Алтайский край,Барнаул,2020,1,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
2,-3,749,dull,-,Ю,2,Алтайский край,Барнаул,2020,1,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
3,-6,753,dull,-,Ю,1,Алтайский край,Барнаул,2020,1,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
4,-2,752,suncl,-,Ю,3,Алтайский край,Барнаул,2020,1,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30516,-6,752,dull,-,З,1,Московская область,Щелково,2020,12,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
30517,-6,759,sunc,-,ЮВ,1,Московская область,Щелково,2020,12,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
30518,-5,754,dull,-,ЮВ,3,Московская область,Щелково,2020,12,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0
30519,-3,753,dull,-,-,0,Московская область,Щелково,2020,12,...,-1,-1.0,-1,-1,-1,-1.0,-1,-1,-1,-1.0


In [14]:
class DayGenerator:
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.times = [12, 15, 18, 21, 0, 3, 6, 9]
        self.dirs = {
            'С': {'values': ['СЗ', 'СВ'], 'p': [0.2, 0.3]},
            'СЗ': {'values': ['С', 'З'], 'p': [0.25, 0.25]},
            'З': {'values': ['СЗ', 'ЮЗ'], 'p': [0.2, 0.3]},
            'ЮЗ': {'values': ['З', 'Ю'], 'p': [0.25, 0.25]},
            'Ю': {'values': ['ЮЗ', 'ЮВ'], 'p': [0.3, 0.2]},
            'ЮВ': {'values': ['Ю', 'В'], 'p': [0.2, 0.3]},
            'В': {'values': ['СВ', 'ЮВ'], 'p': [0.3, 0.2]},
            'СВ': {'values': ['С', 'В'], 'p': [0.3, 0.2]},
        }
        
        self.phens = {
            'snow': {'values': ['небольшой снег', 'снег', 'снег с дождём', 'сильный снег', 'мокрый снег'], 'p': [0.35, 0.25, 0.15, 0.1, 0.15]},
            'rain': {'values': ['небольшой дождь', 'дождь', 'сильный дождь'], 'p': [0.45, 0.35, 0.2]},
            'storm': {'values': ['небольшой дождь', 'дождь', 'гроза', 'сильный дождь'], 'p': [0.15, 0.25, 0.3, 0.3]},
        }
        
        self.obl = {
            'dull': {'values': ['пасмурно', 'облачно'], 'p': [0.7, 0.3]},
            'suncl': {'values': ['облачно', 'малооблачно', 'пасмурно'], 'p': [0.6, 0.3, 0.1]},
            'sun': {'values': ['ясно', 'малооблачно'], 'p': [0.7, 0.3]},
            'sunc': {'values': ['малооблачно', 'ясно', 'облачно'], 'p': [0.6, 0.2, 0.2]},
        }
        
        self.fishs = ['щука', 'судак', 'окунь', 'берш', 'речная форель', 'озерная форель', 'елец', 'чехонь', 'сом', 'голавль', 'язь',
         'карп', 'жерех', 'лещ', 'карась', 'линь', 'пескарь', 'ротан', 'плотва', 'красноперка', 'налим', 'густера',
         'амур', 'ерш', 'сазан', 'подуст', 'толстолобик', 'вобла', 'хариус']
        
    def __getitem__(self, idx):
        day_priew = self.dataframe.loc[[idx - 1]]
        day = self.dataframe.loc[[idx]]
        day_next = self.dataframe.loc[[idx + 1]]
        pressure = ','.join(map(str, self.gen_pressure(day_priew['day_pressure'].item(), day['day_pressure'].item(), day_next['day_pressure'].item())))
        temperature = ','.join(map(str, self.gen_temperature(day_priew['day_temp'].item(), day['day_temp'].item(), day_next['day_temp'].item())))
        wind_, gust_ = self.gen_wind(day_priew['day_wind'].item(), day['day_wind'].item(), day_next['day_wind'].item())
        wind = ','.join(map(str, wind_))
        gust = ','.join(map(str, gust_))
        wind_direction = ','.join(self.gen_dir(day['day_dir'].item()))
        phenomen_= self.gen_phenomen(day['day_obl'].item(), day['day_phen'].item())
        humidity = ','.join(map(str,self.gen_hum(phenomen_)))
        phenomen = ','.join(phenomen_)
        uv_index = ','.join(map(str,self.gen_uv(day['month'].item())))
        moon_direction, moon = self.gen_moon(sum(day[fish].item() for fish in self.fishs) / len(self.fishs))
        return {
            'pressure': pressure,
            'temperature': temperature,
            'wind': wind,
            'gust': gust,
            'wind_direction': wind_direction,
            'humidity': humidity,
            'phenomenon': phenomen,
            'uv_index': uv_index,
            'moon_direction': moon_direction,
            'moon': moon,
            'month': day['month'].item(),
            'day': day['day'].item(),
            
        }
        
    def dist_(self, left, right, len_):
        is_revert = False
        if left > right:
            left, right = right, left
            is_revert = True
        result = []
        current = left
        for _ in range(len_ - 1): 
            sub = binom.rvs(right - current, 1 / len_)
            current += sub
            result.append(current)
        result.append(right)
        return result[::-1] if is_revert else result
        
    def gen_pressure(self, pressure_priew, pressure, pressure_next):
        pressure_ = {}
        priew_sub = pressure - pressure_priew
        next_sub = pressure_next - pressure
        current_pressure = pressure_priew
        for time in [15, 18, 21]:
            sub = randint(-2, 0) if priew_sub < 0 else randint(-1, 1)
            current_pressure += sub
        dist = self.dist_(current_pressure, pressure, 5)
        for idx, time in enumerate([0, 3, 6, 9, 12]):
            pressure_.update({time: dist[idx]})
        current_pressure = pressure
        for time in [15, 18, 21]:
            sub = randint(-2, 0) if next_sub < 0 else randint(-1, 1)
            current_pressure += sub
            pressure_.update({time: current_pressure})
        return [pressure_[key] for key in sorted(pressure_)]
    
    def gen_wind(self, wind_priew, wind, wind_next):
        wind_ = {}
        priew_sub = wind - wind_priew
        next_sub = wind_next - wind
        current_wind = randint(0, randint(wind_priew, wind)) if priew_sub > 0 else 0
        dist = self.dist_(current_wind, wind, 5)
        for idx, time in enumerate([0, 3, 6, 9, 12]):
            wind_.update({time: dist[idx]})
        current_wind = wind_[12] + randint(-1, 2)
        current_wind = max(0, current_wind)
        wind_.update({15: current_wind})
        current_wind += randint(-1, 2)
        current_wind = max(0, current_wind)
        wind_.update({18: current_wind})
        current_wind += randint(-3, 0)
        current_wind = max(0, current_wind)
        wind_.update({21: current_wind})
        gust_= {time: wind_[time] + randint(3, 8) for time in self.times}
        return [wind_[key] for key in sorted(wind_)], [gust_[key] for key in sorted(gust_)]
    
    def gen_dir(self,direction):
        dir_ = {}
        if direction == '-':
            direction = choice(list(self.dirs.keys()))
        dist = list(choice([direction] + self.dirs[direction]['values'], 8, p=[0.5] + self.dirs[direction]['p']))
        for idx, time in enumerate(self.times):
            dir_.update({time: dist[idx]})
        return [dir_[key] for key in sorted(dir_)]
    
    def gen_temperature(self, temp_priew, temp, temp_next):
        temp_ = {}
        priew_sub = temp - temp_priew
        next_sub = temp_next - temp
        current_temp = temp_priew
        for time in [15, 18, 21]:
            sub = randint(-3, -1) if priew_sub < 0 else randint(-2, 0)
            current_temp += sub
        dist = self.dist_(current_temp, temp, 5)
        for idx, time in enumerate([0, 3, 6, 9, 12]):
            temp_.update({time: dist[idx]})
        current_temp = temp
        for time in [15, 18, 21]:
            sub = randint(-3, -1) if next_sub < 0 else randint(-2, 0)
            current_temp += sub
            temp_.update({time: current_temp})
        return [temp_[key] for key in sorted(temp_)]
    
    def gen_phenomen(self, obl, phen):
        phens_ = {}
        if obl == '-':
            obl = 'sun'
        dist_olb = list(choice(self.obl[obl]['values'], 8, p=self.obl[obl]['p']))
        if phen == '-':
            for idx, time in enumerate(self.times):
                phens_.update({time: dist_olb[idx]})
            return [phens_[key] for key in sorted(phens_)] 
        dist_phens = list(choice(self.phens[phen]['values'], 8, p=self.phens[phen]['p']))
        for idx, time in enumerate(self.times):
            phens_.update({time: '.'.join((dist_olb[idx], dist_phens[idx]))})
        return [phens_[key] for key in sorted(phens_)]
    
    def gen_hum(self, phens):
        hum_ = []
        current_hum = randint(30, 70)
        for phen in phens:
            if 'дождь' in phen:
                current_hum += randint(0, 20)
                current_hum = min(current_hum, randint(93, 99))
            else:
                current_hum += randint(-10, 5)
                current_hum = max(current_hum, randint(10, 29))
            hum_.append(current_hum)
        return hum_
    
    def gen_uv(self, month):
        uv_ = {}
        if month in [12, 1, 2]:
            uv_ = {0: 0, 3: 0, 6: 0, 9: 0, 12: randint(1, 2), 15: 1, 18: 0, 21:0} 
        elif month in [3, 11, 10, 4]:
            uv_ = {0: 0, 3: 0, 6: 0, 9: 1, 12: randint(2, 3), 15: randint(1, 2), 18: 1, 21: 0}
        elif month in [5, 9]:
            uv_ = {0: 0, 3: 0, 6: randint(0, 1), 9: randint(1, 2), 12: randint(2, 4), 15: randint(2, 3), 18: randint(1, 2), 21: randint(0, 1)}
        else:
            uv_ = {0: 0, 3: 0, 6: randint(1, 2), 9: randint(1, 3), 12: randint(2, 4), 15: randint(1, 3), 18: randint(1, 3), 21: randint(1, 2)}
        return [uv_[key] for key in sorted(uv_)]
    
    def gen_moon(self, forecast):
        if forecast > 0.5:
            return 2* randint(0, 1) - 1, randint(35, 70)
        else:
            if randint(0, 1):
                return 2* randint(0, 1) - 1, randint(0, 35)
            else:
                return 2* randint(0, 1) - 1, randint(70, 99)   

In [24]:
ALONE_KEYS = {'time', 'day', 'month', 'humidity', 'uv_index', 'moon', 'moon_direction'}
DIGIT_KEYS = {'temperature', 'wind', 'gust', 'pressure', 'humidity', 'uv_index'}
CATEGORY_KEYS = {'phenomenon', 'wind_direction'}
MOON_KEYS = {'moon', 'moon_direction'}
SUN_KEYS = {'sun_up', 'sun_down'}
WIND_DIRECTIONS = ['Ю', 'ЮЗ', 'З', 'СЗ', 'С', 'СВ', 'В', 'ЮВ']
PHENOMENONS = ['ясно', 'малооблачно', 'облачно', 'пасмурно', 'небольшой дождь', 'дождь', 'сильный дождь',
               'небольшой снег', 'снег', 'снег с дождём', 'сильный снег', 'гроза', 'мокрый снег']
REGIONS = ['Алтайский край', 'Амурская область', 'Архангельская область', 'Астраханская область',
           'Белгородская область', 'Брянская область', 'Владимирская область', 'Волгоградская область',
           'Вологодская область', 'Воронежская область', 'Еврейская автономная область', 'Забайкальский край',
           'Ивановская область', 'Иркутская область', 'Кабардино-Балкарская республика', 'Калининградская область',
           'Калужская область', 'Камчатский край', 'Карачаево-Черкесская республика', 'Кемеровская область',
           'Кировская область', 'Костромская область', 'Краснодарский край', 'Красноярский край', 'Курганская область',
           'Курская область', 'Ленинградская область', 'Липецкая область', 'Магаданская область', 'Московская область']


num_hours = 8
num_days = 3

def preprocess_(data):
    all_data = {}
    for d in data:
        for key in DIGIT_KEYS:
            temp = list(map(int, d[key].split(',')))
            if key in all_data:
                all_data[key] = all_data[key] + temp
            else:
                all_data[key] = temp
        for key in MOON_KEYS:
            temp = [d[key] for _ in range(num_hours)]
            if key in all_data:
                all_data[key] = all_data[key] + temp
            else:
                all_data[key] = temp
        for key in CATEGORY_KEYS:
            if key in all_data:
                all_data[key] = all_data[key] + d[key].split(',')
            else:
                all_data[key] = d[key].split(',')
        days = [d['day'] for _ in range(num_hours)]
        months = [d['month'] for _ in range(num_hours)]
        if 'day' in all_data:
            all_data['day'] = all_data['day'] + days
        else:
            all_data['day'] = days
        if 'month' in all_data:
            all_data['month'] = all_data['month'] + months
        else:
            all_data['month'] = months
        if 'time' in all_data:
            all_data['time'] = all_data['time'] + list(range(0, num_hours * 3, 3))
        else:
            all_data['time'] = list(range(0, num_hours * 3, 3))
    return all_data

def slice_(data, left_bound, righ_bound):
    slice_data = {
        key: data[key][left_bound: righ_bound] for key in data
    }
    return slice_data

def preprocess_batch_(data):
    vec = {}
    for key in data:
        if key in DIGIT_KEYS and not key in ALONE_KEYS:
            for i in range(len(data[key])):
                key_name = '{}_{}'.format(key, i)
                vec.update({key_name: data[key][i]})
        elif key == 'phenomenon':
            phenomenons_ = [_.split('.') for _ in data[key]]
            for phenomenon in PHENOMENONS:
                for i in range(len(phenomenons_)):
                    key_name = '{}_{}'.format(phenomenon, i)
                    vec.update({key_name: int(phenomenon in phenomenons_[i])})
        elif key == 'wind_direction':
            for wind_direction in WIND_DIRECTIONS:
                for i in range(len(data[key])):
                    key_name = '{}_{}'.format(wind_direction, i)
                    vec.update({key_name: int(wind_direction == data[key][i])})
        elif key == 'month':
            for month in range(1, 31):
                key_name = 'month_{}'.format(month)
                vec.update({key_name: int(month == data[key][-1])})
        elif key == 'time':
            for time in range(0, 24, 3):
                key_name = 'time_{}'.format(time)
                vec.update({key_name: int(time == data[key][-1])})
        elif key == 'moon_direction':
            for moon_direction in [-1, 1]:
                key_name = 'moon_direction_{}'.format(moon_direction)
                vec.update({key_name: int(moon_direction == data[key][-1])})
        elif key in ALONE_KEYS:
            key_name = '{}'.format(key)
            vec.update({key_name: data[key][-1]})
    return vec

def gen_forecast(forecast, time, fish):
    if fish in ['сом', 'налим']:
        if time in [21, 0, 3]:
            return 1 * forecast
        else:
            return forecast * (randint(1, 10) <= 2)
    else:
        if time in [6, 18]:
            return int(forecast) | (randint(1, 10) <= 2)
        elif time in [9, 12, 15]:
            return forecast * (randint(1, 10) <= 8)
        else:
            return forecast * (randint(1, 10) <= 2)
            

In [40]:
gen = DayGenerator(dt)

In [41]:
train_data = []
for idx, row in tqdm(dt.iterrows()):
    if row['щука'] > -1:
        data = dt[idx - 3: idx + 1]
        day = gen[idx]
        day_1 = gen[idx - 1]
        day_2 = gen[idx - 2]
        day_3 = gen[idx - 3]
        all_data = preprocess_([day_3, day_2, day_1, day])
#         print(all_data)
#         break
        len_data = len(all_data['moon'])
        probs = {fish: [] for fish in gen.fishs}
        for fish in gen.fishs:
            for i in range(num_hours * num_days, len_data + 1):
                slice_data = slice_(all_data, i - num_hours * num_days, i)
                vec = preprocess_batch_(slice_data)
                for fish_ in gen.fishs:
                    vec.update({fish_: int(fish_ == fish)})
                time = 0
                for j in range(0, 24, 3):
                    if vec['time_{}'.format(j)] == 1:
                        time = j
                        break
                vec = {key: vec[key] for key in sorted(vec)}
                forecast = gen_forecast(row[fish], time, fish)
                vec.update({'forecast': forecast})
                train_data.append(vec)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [42]:
len(train_data)

26883

In [51]:
train_dt = pd.DataFrame(train_data)

In [52]:
train_dt.describe()

Unnamed: 0,day,gust_0,gust_1,gust_10,gust_11,gust_12,gust_13,gust_14,gust_15,gust_16,...,ясно_22,ясно_23,ясно_3,ясно_4,ясно_5,ясно_6,ясно_7,ясно_8,ясно_9,forecast
count,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,...,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0,26883.0
mean,16.200647,7.854369,7.919094,7.935275,7.969795,8.12082,8.143474,8.149946,7.992449,7.809061,...,0.229773,0.238403,0.20712,0.208198,0.214671,0.216828,0.225458,0.22438,0.218986,0.259234
std,8.85965,2.832165,2.805297,2.780806,2.793779,2.869088,2.856171,2.806686,2.753287,2.757253,...,0.420695,0.426115,0.40525,0.406027,0.410602,0.412092,0.417892,0.417181,0.413567,0.438223
min,1.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,16.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,24.0,10.0,10.0,9.0,9.0,10.0,10.0,10.0,10.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,31.0,20.0,20.0,19.0,19.0,20.0,20.0,20.0,20.0,20.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [53]:
f1 = train_dt[train_dt['forecast'] == 1]
f0 = train_dt[train_dt['forecast'] == 0].sample(len(f1))
train_dt = pd.concat([f0, f1])

In [54]:
train_dt = train_dt.sample(frac=1.0).reset_index(drop=True)

In [55]:
y = train_dt['forecast']
del train_dt['forecast']

In [58]:
y.std()

0.5000179375414864

In [57]:
train_dt.describe()

Unnamed: 0,day,gust_0,gust_1,gust_10,gust_11,gust_12,gust_13,gust_14,gust_15,gust_16,...,ясно_21,ясно_22,ясно_23,ясно_3,ясно_4,ясно_5,ясно_6,ясно_7,ясно_8,ясно_9
count,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,...,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0,13938.0
mean,15.777658,7.849548,7.883197,7.905797,7.915268,7.918353,7.933204,8.019515,7.997632,7.821065,...,0.231597,0.236404,0.246377,0.210719,0.210791,0.218683,0.22184,0.230951,0.233032,0.227005
std,9.043397,2.790315,2.784218,2.783619,2.784671,2.794626,2.801931,2.75068,2.729709,2.757858,...,0.421868,0.424888,0.430916,0.407834,0.407885,0.413368,0.415499,0.421456,0.422778,0.418911
min,1.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,15.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,24.0,10.0,10.0,9.0,9.0,9.0,9.0,10.0,10.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,31.0,20.0,20.0,19.0,19.0,20.0,20.0,20.0,20.0,20.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [59]:
X = train_dt.values

In [60]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [61]:
X_train.shape

(11150, 673)

In [62]:
X_test.shape

(2788, 673)

In [68]:
from catboost import CatBoostClassifier

model = CatBoostClassifier()

In [69]:
model.fit(X_train, y_train)

Learning rate set to 0.028848
0:	learn: 0.6848565	total: 252ms	remaining: 4m 12s
1:	learn: 0.6783212	total: 289ms	remaining: 2m 24s
2:	learn: 0.6695786	total: 322ms	remaining: 1m 46s
3:	learn: 0.6621234	total: 343ms	remaining: 1m 25s
4:	learn: 0.6542317	total: 362ms	remaining: 1m 12s
5:	learn: 0.6465450	total: 382ms	remaining: 1m 3s
6:	learn: 0.6395694	total: 403ms	remaining: 57.2s
7:	learn: 0.6335518	total: 425ms	remaining: 52.7s
8:	learn: 0.6279984	total: 450ms	remaining: 49.6s
9:	learn: 0.6219578	total: 473ms	remaining: 46.8s
10:	learn: 0.6173320	total: 495ms	remaining: 44.5s
11:	learn: 0.6115984	total: 516ms	remaining: 42.5s
12:	learn: 0.6064085	total: 538ms	remaining: 40.8s
13:	learn: 0.6024711	total: 559ms	remaining: 39.4s
14:	learn: 0.5988293	total: 579ms	remaining: 38s
15:	learn: 0.5943952	total: 601ms	remaining: 37s
16:	learn: 0.5914702	total: 629ms	remaining: 36.4s
17:	learn: 0.5883163	total: 657ms	remaining: 35.8s
18:	learn: 0.5852874	total: 681ms	remaining: 35.2s
19:	learn:

163:	learn: 0.4581349	total: 4.35s	remaining: 22.2s
164:	learn: 0.4575675	total: 4.38s	remaining: 22.1s
165:	learn: 0.4569572	total: 4.39s	remaining: 22.1s
166:	learn: 0.4567141	total: 4.42s	remaining: 22s
167:	learn: 0.4563525	total: 4.44s	remaining: 22s
168:	learn: 0.4559852	total: 4.46s	remaining: 21.9s
169:	learn: 0.4555642	total: 4.48s	remaining: 21.9s
170:	learn: 0.4553100	total: 4.5s	remaining: 21.8s
171:	learn: 0.4548500	total: 4.52s	remaining: 21.8s
172:	learn: 0.4546263	total: 4.54s	remaining: 21.7s
173:	learn: 0.4544472	total: 4.56s	remaining: 21.7s
174:	learn: 0.4542167	total: 4.58s	remaining: 21.6s
175:	learn: 0.4537876	total: 4.6s	remaining: 21.6s
176:	learn: 0.4534756	total: 4.62s	remaining: 21.5s
177:	learn: 0.4532317	total: 4.64s	remaining: 21.4s
178:	learn: 0.4530073	total: 4.66s	remaining: 21.4s
179:	learn: 0.4525685	total: 4.68s	remaining: 21.3s
180:	learn: 0.4522779	total: 4.71s	remaining: 21.3s
181:	learn: 0.4518383	total: 4.73s	remaining: 21.3s
182:	learn: 0.4515

327:	learn: 0.4106670	total: 7.89s	remaining: 16.2s
328:	learn: 0.4103737	total: 7.91s	remaining: 16.1s
329:	learn: 0.4100728	total: 7.94s	remaining: 16.1s
330:	learn: 0.4097777	total: 7.96s	remaining: 16.1s
331:	learn: 0.4094528	total: 7.98s	remaining: 16.1s
332:	learn: 0.4090461	total: 8s	remaining: 16s
333:	learn: 0.4088497	total: 8.02s	remaining: 16s
334:	learn: 0.4086097	total: 8.04s	remaining: 16s
335:	learn: 0.4081147	total: 8.06s	remaining: 15.9s
336:	learn: 0.4077835	total: 8.09s	remaining: 15.9s
337:	learn: 0.4074175	total: 8.11s	remaining: 15.9s
338:	learn: 0.4070711	total: 8.13s	remaining: 15.8s
339:	learn: 0.4067486	total: 8.15s	remaining: 15.8s
340:	learn: 0.4064859	total: 8.17s	remaining: 15.8s
341:	learn: 0.4061497	total: 8.19s	remaining: 15.8s
342:	learn: 0.4057594	total: 8.21s	remaining: 15.7s
343:	learn: 0.4054043	total: 8.23s	remaining: 15.7s
344:	learn: 0.4048553	total: 8.25s	remaining: 15.7s
345:	learn: 0.4044515	total: 8.27s	remaining: 15.6s
346:	learn: 0.4042185

492:	learn: 0.3628573	total: 11.4s	remaining: 11.7s
493:	learn: 0.3626002	total: 11.4s	remaining: 11.7s
494:	learn: 0.3623130	total: 11.4s	remaining: 11.7s
495:	learn: 0.3620971	total: 11.5s	remaining: 11.6s
496:	learn: 0.3618112	total: 11.5s	remaining: 11.6s
497:	learn: 0.3616198	total: 11.5s	remaining: 11.6s
498:	learn: 0.3613891	total: 11.5s	remaining: 11.6s
499:	learn: 0.3611248	total: 11.5s	remaining: 11.5s
500:	learn: 0.3608615	total: 11.6s	remaining: 11.5s
501:	learn: 0.3606510	total: 11.6s	remaining: 11.5s
502:	learn: 0.3604011	total: 11.6s	remaining: 11.5s
503:	learn: 0.3601320	total: 11.6s	remaining: 11.4s
504:	learn: 0.3598113	total: 11.6s	remaining: 11.4s
505:	learn: 0.3595446	total: 11.7s	remaining: 11.4s
506:	learn: 0.3593859	total: 11.7s	remaining: 11.4s
507:	learn: 0.3591496	total: 11.7s	remaining: 11.3s
508:	learn: 0.3589805	total: 11.7s	remaining: 11.3s
509:	learn: 0.3587321	total: 11.8s	remaining: 11.3s
510:	learn: 0.3585256	total: 11.8s	remaining: 11.3s
511:	learn: 

657:	learn: 0.3297895	total: 15.1s	remaining: 7.86s
658:	learn: 0.3295894	total: 15.1s	remaining: 7.84s
659:	learn: 0.3294053	total: 15.2s	remaining: 7.81s
660:	learn: 0.3291846	total: 15.2s	remaining: 7.79s
661:	learn: 0.3289809	total: 15.2s	remaining: 7.77s
662:	learn: 0.3288380	total: 15.2s	remaining: 7.75s
663:	learn: 0.3286920	total: 15.3s	remaining: 7.73s
664:	learn: 0.3285543	total: 15.3s	remaining: 7.7s
665:	learn: 0.3283586	total: 15.3s	remaining: 7.68s
666:	learn: 0.3281524	total: 15.3s	remaining: 7.66s
667:	learn: 0.3279505	total: 15.4s	remaining: 7.64s
668:	learn: 0.3277454	total: 15.4s	remaining: 7.62s
669:	learn: 0.3275767	total: 15.4s	remaining: 7.59s
670:	learn: 0.3273471	total: 15.4s	remaining: 7.57s
671:	learn: 0.3272184	total: 15.5s	remaining: 7.55s
672:	learn: 0.3270531	total: 15.5s	remaining: 7.53s
673:	learn: 0.3269266	total: 15.5s	remaining: 7.51s
674:	learn: 0.3267808	total: 15.5s	remaining: 7.49s
675:	learn: 0.3266014	total: 15.6s	remaining: 7.46s
676:	learn: 0

824:	learn: 0.3033543	total: 19.1s	remaining: 4.04s
825:	learn: 0.3031919	total: 19.1s	remaining: 4.02s
826:	learn: 0.3030760	total: 19.1s	remaining: 4s
827:	learn: 0.3029032	total: 19.1s	remaining: 3.97s
828:	learn: 0.3027459	total: 19.2s	remaining: 3.95s
829:	learn: 0.3026134	total: 19.2s	remaining: 3.93s
830:	learn: 0.3024959	total: 19.2s	remaining: 3.9s
831:	learn: 0.3023432	total: 19.2s	remaining: 3.88s
832:	learn: 0.3022067	total: 19.2s	remaining: 3.86s
833:	learn: 0.3020671	total: 19.3s	remaining: 3.84s
834:	learn: 0.3019326	total: 19.3s	remaining: 3.81s
835:	learn: 0.3017848	total: 19.3s	remaining: 3.79s
836:	learn: 0.3016352	total: 19.3s	remaining: 3.77s
837:	learn: 0.3015370	total: 19.4s	remaining: 3.74s
838:	learn: 0.3013793	total: 19.4s	remaining: 3.72s
839:	learn: 0.3012549	total: 19.4s	remaining: 3.69s
840:	learn: 0.3010872	total: 19.4s	remaining: 3.67s
841:	learn: 0.3009530	total: 19.4s	remaining: 3.65s
842:	learn: 0.3007883	total: 19.5s	remaining: 3.63s
843:	learn: 0.30

990:	learn: 0.2817771	total: 22.8s	remaining: 207ms
991:	learn: 0.2816427	total: 22.9s	remaining: 184ms
992:	learn: 0.2815246	total: 22.9s	remaining: 161ms
993:	learn: 0.2814195	total: 22.9s	remaining: 138ms
994:	learn: 0.2812840	total: 22.9s	remaining: 115ms
995:	learn: 0.2811557	total: 23s	remaining: 92.2ms
996:	learn: 0.2810530	total: 23s	remaining: 69.1ms
997:	learn: 0.2809407	total: 23s	remaining: 46.1ms
998:	learn: 0.2808217	total: 23s	remaining: 23ms
999:	learn: 0.2806940	total: 23s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x16fb27b9888>

In [70]:
preds = model.predict(X_test)

In [71]:
from sklearn.metrics import accuracy_score

print(accuracy_score(preds, y_test))

0.8055954088952654


In [72]:
model.predict_proba(X_test)

array([[0.66979494, 0.33020506],
       [0.10858889, 0.89141111],
       [0.77349438, 0.22650562],
       ...,
       [0.06124426, 0.93875574],
       [0.6049637 , 0.3950363 ],
       [0.67681242, 0.32318758]])

In [73]:
from joblib import dump

In [74]:
dump(model, 'catboost_0.3.model')

['catboost_0.3.model']