# Setup

In [None]:
import numpy as np 
import pandas as pd

from catboost import CatBoostRegressor, Pool
from catboost.eval.catboost_evaluation import *

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=False)

import os
print(os.listdir("../input"))
print(os.listdir("../input/LANL-Earthquake-Prediction"))
print(os.listdir("../input/lanl-features"))

# Read Data

In [None]:
X = pd.read_csv('../input/lanl-features/train_features.csv')
X_test = pd.read_csv('../input/lanl-features/test_features.csv')
y = pd.read_csv('../input/lanl-features/y.csv')
submission = pd.read_csv('../input/LANL-Earthquake-Prediction/sample_submission.csv')

## Save Train data

In [None]:
df_train = y.join(X)
df_train.to_csv('train.csv', header=False, index=False)
df_train

## Learning Parameters

In [None]:
learn_params = {'iterations': 10, 
                'random_seed': 0, 
                'logging_level': 'Silent',
                'loss_function': 'MAE',
                # You could set learning process to GPU
                #'devices': '1',  
                'task_type': 'GPU',                
                'boosting_type': 'Ordered', 
                # For feature evaluation learning time is important and we need just the relative quality
                'max_ctr_complexity' : 4}

## Features To Evaluate

In [None]:
features_to_evaluate = [i for i in range(50)]
features_to_evaluate

Create Description file for features

In [None]:
from catboost.utils import create_cd

feature_names = dict()
for column, name in enumerate(df_train):
    if column == 0:
        continue
    feature_names[column - 1] = name
    
create_cd(
    label=0, 
    cat_features=[477, 804, 981],
    feature_names=feature_names,
    output_path='train.cd'
)
!cat 'train.cd'

## Catboost Evaluation

In [None]:
fold_size = X.shape[0]//2
fold_offset = 0
folds_count = 5
random_seed = 1

evaluator = CatboostEvaluation('train.csv',
                               fold_size,
                               folds_count,
                               delimiter=',',
                               column_description='train.cd',
                               partition_random_seed=random_seed,
                               #working_dir=...  — working directory, we will need to create temp files during evaluation, 
                               #so ensure you have enough free space. 
                               #By default we will create unique temp dir in system temp directory
                               #group_column=... — set it if you have column which should be used to split 
)


In [None]:
%%time
result = evaluator.eval_features(learn_config=learn_params,
                                 eval_metrics=["MAE"],
                                 features_to_eval=features_to_evaluate)

In [None]:
MAE_result = result.get_metric_results("MAE")

In [None]:
#MAE_result.get_baseline_comparison()
MAE_result.

In [None]:
iplot(MAE_result.create_fold_learning_curves(0))

In [None]:
baseline_case = MAE_result.get_baseline_case()

In [None]:
baseline_case

In [None]:
baseline_result = MAE_result.get_case_result(baseline_case)

In [None]:
iplot(baseline_result.create_learning_curves_plot())

# Choose learning rate

In [None]:
learning_rate_params = learn_params

In [None]:
baseline_case = ExecutionCase(label="Step {}".format(0.03),
                              params=learning_rate_params, 
                              learning_rate=0.03)

In [None]:
other_learning_rate_cases = [ExecutionCase(label="Step {}".format(step), 
                                           params=learning_rate_params, 
                                           learning_rate=step) for step in [0.05, 0.015]]

In [None]:
evaluator = CatboostEvaluation('train.csv',
                               fold_size, 
                               fold_count=1,  #For learning rate estimation we need just 1 fold
                               delimiter=',',
                               column_description='train.cd',
                               partition_random_seed=random_seed)

In [None]:
evaluator.get_working_dir()

In [None]:
learning_rates_result = evaluator.eval_cases(baseline_case, 
                                             other_learning_rate_cases,
                                             eval_metrics="MAE")

In [None]:
MAE_learning_rate_search_results = learning_rates_result.get_metric_results("MAE")

In [None]:
tmp = MAE_learning_rate_search_results.create_fold_learning_curves(fold=0, offset=200)

In [None]:
iplot(tmp)