# -2) fonction personnelle

In [194]:
def push(message):
    """
    Save current notebook and push whole current dir to origin/master
    
    Param:
        message : str 
            Commit's message
    
    Return:
        None
    """
    assert(len(message.strip())>10)
    import subprocess
    from IPython.display import display, Javascript
    
    cmds = [  "git add .",
              "git commit -m ", 
              "git push"]
    
    # save current notebook
    display(Javascript('IPython.notebook.save_checkpoint();'))
    for cmd in cmds:
        cmd = cmd.split()
        if "commit" in cmd: 
            cmd.append( f"'{message}'") 
        print(" ".join(cmd))
        subprocess.check_output(cmd)
    print("Fini.")

In [195]:
push("jour 4 - machine learning - intro")

<IPython.core.display.Javascript object>

git add .
git commit -m 'jour 4 - machine learning - intro'
git push
Fini.


# -1) imports

In [2]:
import pandas                as     pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble        import RandomForestClassifier

# 0) import de données

Source de données https://archive.ics.uci.edu/ml/datasets/adult

In [79]:
url_to_csv = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
df         = pd.read_csv(url_to_csv , header=None)
df_originale = pd.read_csv(url_to_csv , header=None)

In [80]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [81]:
df.shape

(32561, 15)

## 0.1) copier les colonnes de la documentation pour les mettre dans la df

In [82]:
new_colonnes = ["age",
                "workclass",
                "fnlwgt",
                "education",
                "education-num",
                "marital-status",
                "occupation",
                "relationship",
                "race",
                "sex",
                "capital-gain",
                "capital-loss",
                "hours-per-week",
                "native-country",
               "revenu"]

In [83]:
df.columns = new_colonnes

In [84]:
df_originale.columns = new_colonnes

In [85]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,revenu
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


## A) Explication du  one-hot encoding
les colonnes qui contiennent des catégories doivent être "unpacked" dans une colonne par catégorie => c'est la multiplication des colonnes => one-hot encoding

In [86]:
data = {"métiers" : ["journaliste", "capitaine", "journaliste"]}# , "personnage" : ["tintin", "hadock"]}
test = pd.DataFrame(data)

In [87]:
test

Unnamed: 0,métiers
0,journaliste
1,capitaine
2,journaliste


In [88]:
pd.get_dummies(test)

Unnamed: 0,métiers_capitaine,métiers_journaliste
0,0,1
1,1,0
2,0,1


In [89]:
# Ici on a une seule colonne "éducation"
# qui contient 16 possibilités d'éducation différentes => 16 chaine de caractère différente
# Et pour chacune de ces possibilités, on va créer une colonne
df.education.value_counts()

 HS-grad         10501
 Some-college     7291
 Bachelors        5355
 Masters          1723
 Assoc-voc        1382
 11th             1175
 Assoc-acdm       1067
 10th              933
 7th-8th           646
 Prof-school       576
 9th               514
 12th              433
 Doctorate         413
 5th-6th           333
 1st-4th           168
 Preschool          51
Name: education, dtype: int64

In [90]:
# La fonction "get_dummies" permet très simplement de faire du "one hot encoding"
# et crée une colonne pour chaque chaîne de caractère contenue dans la dataframe
# => ça permet de remplacer les chaînes de caractères en nombre binaires : 0 / 1
# => ainsi la dataframe devient exploitable par les algorithmes qui ont besoin de ne traiter que des chiffres
# => on remplace des chaînes de caractères par des colonnes booléennes.
pd.get_dummies(df.education).head()

Unnamed: 0,10th,11th,12th,1st-4th,5th-6th,7th-8th,9th,Assoc-acdm,Assoc-voc,Bachelors,Doctorate,HS-grad,Masters,Preschool,Prof-school,Some-college
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


# 0.2) on va faire du one-hot encoding sur chaque attribut de type "string" de la database : 


### attributs déjà sous forme de nombres (ils n'ont pas besoin d'être modifiés)
age: continuous.  
education-num: continuous.
capital-gain: continuous.
capital-loss: continuous.
hours-per-week: continuous.

### attributs déjà sous forme binaire (ou pas, si c'est un string ? genre "H" / "F" => dans ce cas il faut du one-hot encoding)
sex: Female, Male.  


### attributs qui sont des chaînes de caractères => il faut les encoder, avec une nouvelle colonne par chaîne de caractère distincte 
workclass: Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked.
fnlwgt: continuous.  
education: Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool.  
marital-status: Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse.  
occupation: Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces.  
relationship: Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried.  
race: White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black.  
native-country: United-States, Cambodia, England, Puerto-Rico, Canada, Germany, Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands.  



### 0.2.1) La fonction "get_dummies" crée les colonnes automatiquement pour le one-hot encoding

In [98]:
df_originale.shape

(32561, 15)

In [107]:
df_originale.dtypes

age                int64
workclass         object
fnlwgt             int64
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
capital-gain       int64
capital-loss       int64
hours-per-week     int64
native-country    object
revenu            object
dtype: object

### 0.2.2) pd.get_dummies : 

In [104]:
df = pd.get_dummies(df) 

#### Plein de colonnes ont été créées, pour chaque colonne qui n'était pas remplie de chiffres ("int" ou "float")

In [105]:
df.shape

(32561, 110)

In [103]:
df.columns

Index(['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss',
       'hours-per-week', 'workclass_ ?', 'workclass_ Federal-gov',
       'workclass_ Local-gov', 'workclass_ Never-worked',
       ...
       'native-country_ Scotland', 'native-country_ South',
       'native-country_ Taiwan', 'native-country_ Thailand',
       'native-country_ Trinadad&Tobago', 'native-country_ United-States',
       'native-country_ Vietnam', 'native-country_ Yugoslavia',
       'revenu_ <=50K', 'revenu_ >50K'],
      dtype='object', length=110)

#### Regardons les colonnes créées pour la colonne de base "native country"

In [102]:
for new_col in list(filter(lambda col: "native" in col, df.columns)):
    print(new_col, end="       ")

native-country_ ?       native-country_ Cambodia       native-country_ Canada       native-country_ China       native-country_ Columbia       native-country_ Cuba       native-country_ Dominican-Republic       native-country_ Ecuador       native-country_ El-Salvador       native-country_ England       native-country_ France       native-country_ Germany       native-country_ Greece       native-country_ Guatemala       native-country_ Haiti       native-country_ Holand-Netherlands       native-country_ Honduras       native-country_ Hong       native-country_ Hungary       native-country_ India       native-country_ Iran       native-country_ Ireland       native-country_ Italy       native-country_ Jamaica       native-country_ Japan       native-country_ Laos       native-country_ Mexico       native-country_ Nicaragua       native-country_ Outlying-US(Guam-USVI-etc)       native-country_ Peru       native-country_ Philippines       native-country_ Poland       native-country_ Port

### Le nombre de "1" dans une colonne créée est égale au nombre de fois que la valeur était présente dans la df originale

In [93]:
df["native-country_ El-Salvador"].sum()

106

In [96]:
df_originale['native-country'].value_counts()

 United-States                 29170
 Mexico                          643
 ?                               583
 Philippines                     198
 Germany                         137
 Canada                          121
 Puerto-Rico                     114
 El-Salvador                     106
 India                           100
 Cuba                             95
 England                          90
 Jamaica                          81
 South                            80
 China                            75
 Italy                            73
 Dominican-Republic               70
 Vietnam                          67
 Guatemala                        64
 Japan                            62
 Poland                           60
 Columbia                         59
 Taiwan                           51
 Haiti                            44
 Iran                             43
 Portugal                         37
 Nicaragua                        34
 Peru                             31
 

# 1) exploration de données

## 1.1) afficher les colonnes

une base de données sur des gens, pour essayer de prévoir leur revenu

In [108]:
for i in df.columns:
    print (i, end="  ")

age  fnlwgt  education-num  capital-gain  capital-loss  hours-per-week  workclass_ ?  workclass_ Federal-gov  workclass_ Local-gov  workclass_ Never-worked  workclass_ Private  workclass_ Self-emp-inc  workclass_ Self-emp-not-inc  workclass_ State-gov  workclass_ Without-pay  education_ 10th  education_ 11th  education_ 12th  education_ 1st-4th  education_ 5th-6th  education_ 7th-8th  education_ 9th  education_ Assoc-acdm  education_ Assoc-voc  education_ Bachelors  education_ Doctorate  education_ HS-grad  education_ Masters  education_ Preschool  education_ Prof-school  education_ Some-college  marital-status_ Divorced  marital-status_ Married-AF-spouse  marital-status_ Married-civ-spouse  marital-status_ Married-spouse-absent  marital-status_ Never-married  marital-status_ Separated  marital-status_ Widowed  occupation_ ?  occupation_ Adm-clerical  occupation_ Armed-Forces  occupation_ Craft-repair  occupation_ Exec-managerial  occupation_ Farming-fishing  occupation_ Handlers-clean

In [112]:
df["revenu_ <=50K"]

0        1
1        1
2        1
3        1
4        1
        ..
32556    1
32557    0
32558    1
32559    1
32560    0
Name: revenu_ <=50K, Length: 32561, dtype: uint8

In [111]:
df_originale[df_originale.columns[-1]].unique()

array([' <=50K', ' >50K'], dtype=object)

#### Comme Y ne contient que deux "modalités" ("<= 50k", ">50k") , on peut déduire l'une de de l'autre => quand l'une est à "1" l'autre est à "0" 

In [113]:
df[df.columns[-2:]]

Unnamed: 0,revenu_ <=50K,revenu_ >50K
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0
...,...,...
32556,1,0
32557,0,1
32558,1,0
32559,1,0


# 2) Création de la base d'apprentissage=>  Je définis les deux variables nécessaires : "X" et "Y" 
=> on va créer un modèle qui prédira les revenus (Y) à partir des informations sur les gens, informations qui sont le niveau d'étude, le pays de naissance, l'éducation, le sexe, etc ... 

In [151]:
df.columns[-1]

'revenu_ >50K'

In [114]:
X = df[df.columns[:-2]] ## je prends toutes les colonnes, sauf les deux dernières (qui contiennent Y, soit la réponse)
Y = df[df.columns[-1]] ## je prends la dernière colonne (car elle est suffisante pour savoir si on est supérieur ou inférieur à 50K de revenu)

### Ici, X et Y ne contiennent que des chiffres => utile pour les algos python

In [116]:
X.dtypes.value_counts()

uint8    102
int64      6
dtype: int64

In [117]:
Y.dtypes

dtype('uint8')

# 3) Divise la base d'apprentissage en "jeu d'apprentissage" et "jeu de test"

=> on va "apprendre" / "entraîner le modèle" sur le jeu d'apprentissage, puis le tester (calculer une performance) sur le jeu de test

In [119]:
x_train, x_test, y_train, y_test = train_test_split(X,Y)

# 4) Modélisation de Y par X => ici modélisation des revenus en fonction des informations sur les gens (âge, nationalité, niveau d'étude, statut marital)

### Equation : Algorithme + data = modèle

In [156]:
algo   = RandomForestClassifier()      # 1) on choisit un algorithme => ici un random forest
modele = algo.fit(x_train, y_train)    # 2) on lui donne le jeu d'apprentissage pour qu'il apprenne

### le modèle se propose de se scorer (fonction score)

In [191]:
modele.score(x_test, y_test)           # 3) on le test sur le jeu de test => il nous renvoie un score

0.8760594521557549

## 4.1) scoring du modèle

#### Le modèle contient une fonction "predict" qui me permet de prédire le revenu pour un individu donné

In [126]:
x_test.iloc[1] # je prends une ligne, avec les informations sur un individu (108 colonnes)

age                                   38
fnlwgt                             51838
education-num                          9
capital-gain                           0
capital-loss                           0
                                   ...  
native-country_ Thailand               0
native-country_ Trinadad&Tobago        0
native-country_ United-States          1
native-country_ Vietnam                0
native-country_ Yugoslavia             0
Name: 6191, Length: 108, dtype: int64

#### Comparons avec la réalité

In [159]:
numero_de_ligne_de_la_personne = 1
une_personne         = pd.DataFrame(x_test.iloc[numero_de_ligne_de_la_personne])
une_personne.columns = ["une_personne"]
mask                 = 0 < une_personne.une_personne
une_personne[mask]

Unnamed: 0,une_personne
age,38
fnlwgt,51838
education-num,9
hours-per-week,40
workclass_ Private,1
education_ HS-grad,1
marital-status_ Married-civ-spouse,1
occupation_ Prof-specialty,1
relationship_ Wife,1
race_ White,1


In [148]:
type(une_personne)

pandas.core.frame.DataFrame

#### Test sur 100 lignes

In [170]:
for numero_de_ligne_de_la_personne in range(100):
    prediction = modele.predict([x_test.iloc[numero_de_ligne_de_la_personne]]) 
    valeur_reelle = y_test.iloc[numero_de_ligne_de_la_personne]
    if prediction==0: 
        en_français = "revenu < 50K"
    if prediction==1:
        en_français = "50K < revenu"

    if prediction == valeur_reelle:
        conclusion = "le modèle à raison"
        
    if prediction != valeur_reelle:
        conclusion = "le modèle s'est trompé"
    print("-"*20)
    print(f" Le modèle a prédit {prediction} à propos du revenu s'il est ou non {df.columns[-1]} (soit {en_français})")
    print(f" {conclusion}, la valeur attendue était {valeur_reelle}")

--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [1] à propos du revenu s'il est ou non revenu_ >50K (soit 50K < revenu)
 le modèle à raison, la valeur attendue était 1
--------------------
 Le modèle a 

--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle s'est trompé, la valeur attendue était 1
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèle a prédit [0] à propos du revenu s'il est ou non revenu_ >50K (soit revenu < 50K)
 le modèle à raison, la valeur attendue était 0
--------------------
 Le modèl

## 5) La killer-feature du random forest : feature_importances

Les random forest vous disent quelles sont les colonnes "importantes" pour la prédiction.

In [187]:
pd.DataFrame(zip(x_train.columns, modele.feature_importances_)).sort_values(1).tail(10)

Unnamed: 0,0,1
42,occupation_ Exec-managerial,0.020014
35,marital-status_ Never-married,0.029179
4,capital-loss,0.029354
53,relationship_ Husband,0.03782
33,marital-status_ Married-civ-spouse,0.058634
2,education-num,0.065458
5,hours-per-week,0.082067
3,capital-gain,0.095484
0,age,0.149922
1,fnlwgt,0.156519


# 6) utilisons un autre algorithme

In [172]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.4.1-py3-none-macosx_10_14_x86_64.macosx_10_15_x86_64.macosx_11_0_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 4.1 MB/s eta 0:00:01
Installing collected packages: xgboost
Successfully installed xgboost-1.4.1


In [174]:
!pip install catboost

Collecting catboost
  Downloading catboost-0.25.1-cp38-none-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (11.1 MB)
[K     |████████████████████████████████| 11.1 MB 2.1 MB/s eta 0:00:01
Collecting plotly
  Downloading plotly-4.14.3-py2.py3-none-any.whl (13.2 MB)
[K     |████████████████████████████████| 13.2 MB 3.9 MB/s eta 0:00:01
Collecting graphviz
  Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Processing /Users/romain/Library/Caches/pip/wheels/c4/a7/48/0a434133f6d56e878ca511c0e6c38326907c0792f67b476e56/retrying-1.3.3-py3-none-any.whl
Installing collected packages: retrying, plotly, graphviz, catboost
Successfully installed catboost-0.25.1 graphviz-0.16 plotly-4.14.3 retrying-1.3.3


In [188]:
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

## 6.1) XGBoost

In [189]:
algo   = XGBClassifier()
modele = algo.fit(x_train, y_train)
modele.score(x_test, y_test)







0.8720058960815624

## 6.2) CatBoost

In [190]:
algo   = CatBoostClassifier()
modele = algo.fit(x_train, y_train)
modele.score(x_test, y_test)

Learning rate set to 0.040318
0:	learn: 0.6495200	total: 77.4ms	remaining: 1m 17s
1:	learn: 0.6136871	total: 105ms	remaining: 52.4s
2:	learn: 0.5815158	total: 116ms	remaining: 38.6s
3:	learn: 0.5548454	total: 131ms	remaining: 32.5s
4:	learn: 0.5295869	total: 140ms	remaining: 27.9s
5:	learn: 0.5085937	total: 153ms	remaining: 25.4s
6:	learn: 0.4885432	total: 168ms	remaining: 23.9s
7:	learn: 0.4697367	total: 183ms	remaining: 22.7s
8:	learn: 0.4533222	total: 198ms	remaining: 21.8s
9:	learn: 0.4390944	total: 208ms	remaining: 20.6s
10:	learn: 0.4273594	total: 223ms	remaining: 20s
11:	learn: 0.4170005	total: 234ms	remaining: 19.3s
12:	learn: 0.4085735	total: 253ms	remaining: 19.2s
13:	learn: 0.3999247	total: 273ms	remaining: 19.3s
14:	learn: 0.3915366	total: 291ms	remaining: 19.1s
15:	learn: 0.3849197	total: 304ms	remaining: 18.7s
16:	learn: 0.3790647	total: 317ms	remaining: 18.4s
17:	learn: 0.3737489	total: 337ms	remaining: 18.4s
18:	learn: 0.3693762	total: 351ms	remaining: 18.1s
19:	learn: 

171:	learn: 0.2837434	total: 2.7s	remaining: 13s
172:	learn: 0.2836787	total: 2.72s	remaining: 13s
173:	learn: 0.2836069	total: 2.73s	remaining: 13s
174:	learn: 0.2834766	total: 2.75s	remaining: 13s
175:	learn: 0.2832453	total: 2.76s	remaining: 12.9s
176:	learn: 0.2831756	total: 2.77s	remaining: 12.9s
177:	learn: 0.2830589	total: 2.8s	remaining: 12.9s
178:	learn: 0.2829571	total: 2.82s	remaining: 12.9s
179:	learn: 0.2827841	total: 2.83s	remaining: 12.9s
180:	learn: 0.2827052	total: 2.85s	remaining: 12.9s
181:	learn: 0.2825748	total: 2.86s	remaining: 12.9s
182:	learn: 0.2824864	total: 2.88s	remaining: 12.9s
183:	learn: 0.2823734	total: 2.9s	remaining: 12.8s
184:	learn: 0.2819238	total: 2.91s	remaining: 12.8s
185:	learn: 0.2818590	total: 2.92s	remaining: 12.8s
186:	learn: 0.2817434	total: 2.94s	remaining: 12.8s
187:	learn: 0.2816316	total: 2.96s	remaining: 12.8s
188:	learn: 0.2814244	total: 2.97s	remaining: 12.7s
189:	learn: 0.2813149	total: 2.99s	remaining: 12.7s
190:	learn: 0.2811715	t

339:	learn: 0.2650342	total: 5.4s	remaining: 10.5s
340:	learn: 0.2649161	total: 5.43s	remaining: 10.5s
341:	learn: 0.2648495	total: 5.45s	remaining: 10.5s
342:	learn: 0.2647621	total: 5.47s	remaining: 10.5s
343:	learn: 0.2647203	total: 5.5s	remaining: 10.5s
344:	learn: 0.2646565	total: 5.52s	remaining: 10.5s
345:	learn: 0.2645326	total: 5.54s	remaining: 10.5s
346:	learn: 0.2644560	total: 5.56s	remaining: 10.5s
347:	learn: 0.2643679	total: 5.58s	remaining: 10.4s
348:	learn: 0.2643322	total: 5.59s	remaining: 10.4s
349:	learn: 0.2640560	total: 5.61s	remaining: 10.4s
350:	learn: 0.2640256	total: 5.62s	remaining: 10.4s
351:	learn: 0.2639618	total: 5.64s	remaining: 10.4s
352:	learn: 0.2638998	total: 5.65s	remaining: 10.4s
353:	learn: 0.2638282	total: 5.67s	remaining: 10.4s
354:	learn: 0.2637563	total: 5.69s	remaining: 10.3s
355:	learn: 0.2636783	total: 5.71s	remaining: 10.3s
356:	learn: 0.2636342	total: 5.72s	remaining: 10.3s
357:	learn: 0.2635652	total: 5.74s	remaining: 10.3s
358:	learn: 0.

508:	learn: 0.2532992	total: 8.3s	remaining: 8.01s
509:	learn: 0.2532454	total: 8.31s	remaining: 7.99s
510:	learn: 0.2531563	total: 8.33s	remaining: 7.97s
511:	learn: 0.2530860	total: 8.35s	remaining: 7.96s
512:	learn: 0.2530380	total: 8.37s	remaining: 7.94s
513:	learn: 0.2529815	total: 8.38s	remaining: 7.92s
514:	learn: 0.2529494	total: 8.4s	remaining: 7.91s
515:	learn: 0.2528815	total: 8.41s	remaining: 7.89s
516:	learn: 0.2528293	total: 8.43s	remaining: 7.88s
517:	learn: 0.2527963	total: 8.44s	remaining: 7.86s
518:	learn: 0.2527305	total: 8.46s	remaining: 7.84s
519:	learn: 0.2526770	total: 8.48s	remaining: 7.83s
520:	learn: 0.2526509	total: 8.49s	remaining: 7.81s
521:	learn: 0.2526162	total: 8.5s	remaining: 7.79s
522:	learn: 0.2525663	total: 8.52s	remaining: 7.77s
523:	learn: 0.2525132	total: 8.54s	remaining: 7.76s
524:	learn: 0.2524395	total: 8.55s	remaining: 7.74s
525:	learn: 0.2523918	total: 8.56s	remaining: 7.72s
526:	learn: 0.2522661	total: 8.57s	remaining: 7.7s
527:	learn: 0.25

673:	learn: 0.2449666	total: 10.8s	remaining: 5.22s
674:	learn: 0.2449052	total: 10.8s	remaining: 5.2s
675:	learn: 0.2448655	total: 10.8s	remaining: 5.19s
676:	learn: 0.2448139	total: 10.8s	remaining: 5.17s
677:	learn: 0.2447824	total: 10.9s	remaining: 5.16s
678:	learn: 0.2447388	total: 10.9s	remaining: 5.14s
679:	learn: 0.2446711	total: 10.9s	remaining: 5.12s
680:	learn: 0.2446514	total: 10.9s	remaining: 5.1s
681:	learn: 0.2446088	total: 10.9s	remaining: 5.08s
682:	learn: 0.2445371	total: 10.9s	remaining: 5.07s
683:	learn: 0.2444852	total: 10.9s	remaining: 5.05s
684:	learn: 0.2444376	total: 10.9s	remaining: 5.03s
685:	learn: 0.2443809	total: 11s	remaining: 5.01s
686:	learn: 0.2443514	total: 11s	remaining: 5s
687:	learn: 0.2443107	total: 11s	remaining: 4.98s
688:	learn: 0.2442605	total: 11s	remaining: 4.96s
689:	learn: 0.2442121	total: 11s	remaining: 4.95s
690:	learn: 0.2441721	total: 11s	remaining: 4.93s
691:	learn: 0.2441388	total: 11s	remaining: 4.92s
692:	learn: 0.2440814	total: 11

840:	learn: 0.2377778	total: 13.9s	remaining: 2.63s
841:	learn: 0.2377313	total: 13.9s	remaining: 2.61s
842:	learn: 0.2377135	total: 13.9s	remaining: 2.6s
843:	learn: 0.2376191	total: 13.9s	remaining: 2.58s
844:	learn: 0.2375758	total: 14s	remaining: 2.56s
845:	learn: 0.2375296	total: 14s	remaining: 2.54s
846:	learn: 0.2374784	total: 14s	remaining: 2.53s
847:	learn: 0.2374356	total: 14s	remaining: 2.51s
848:	learn: 0.2373951	total: 14s	remaining: 2.49s
849:	learn: 0.2373671	total: 14s	remaining: 2.47s
850:	learn: 0.2373257	total: 14s	remaining: 2.46s
851:	learn: 0.2373181	total: 14s	remaining: 2.44s
852:	learn: 0.2373025	total: 14.1s	remaining: 2.42s
853:	learn: 0.2372775	total: 14.1s	remaining: 2.41s
854:	learn: 0.2372494	total: 14.1s	remaining: 2.39s
855:	learn: 0.2372329	total: 14.1s	remaining: 2.38s
856:	learn: 0.2371929	total: 14.1s	remaining: 2.36s
857:	learn: 0.2371657	total: 14.2s	remaining: 2.34s
858:	learn: 0.2371261	total: 14.2s	remaining: 2.33s
859:	learn: 0.2370861	total: 

0.8760594521557549