In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import HuberRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import lightgbm as lgbm 
from lightgbm import LGBMRegressor

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))
#import warnings
#warnings.filterwarnings("ignore")
        
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.ndimage import maximum_filter1d
from scipy.ndimage import minimum_filter1d

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
StSc = StandardScaler()
MMS = MinMaxScaler()
from sklearn.model_selection import train_test_split, RandomizedSearchCV


In [None]:
V_PATH = '../input/predict-volcanic-eruptions-ingv-oe/'
TRAIN_PATH = V_PATH + 'train/'
TEST_PATH = V_PATH + 'test/'

In [None]:
SENSOR_COLS = ['sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6',
       'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10']


SENSOR_RMEANS = [x+'_rmin' for x in SENSOR_COLS] 
SENSOR_RSTDS = [x+'_rstd' for x in SENSOR_COLS] 
SENSOR_RMINS = [x+'_rmin' for x in SENSOR_COLS] 
SENSOR_RMAXES = [x+'_rmax' for x in SENSOR_COLS]
SENSOR_RSKEWS = [x+'_rskew' for x in SENSOR_COLS]
SENSOR_RSUMS = [x+'_rsum' for x in SENSOR_COLS]
SENSOR_RVARS = [x+'_rvar' for x in SENSOR_COLS]
#SENSOR_RMADS = [x+'_rmad' for x in SENSOR_COLS]
#SENSOR_RKURTOSISES = [x+'_rkurtosis' for x in SENSOR_COLS]



#SENSOR_RGRADMEAN = [x+'_grad_rmean' for x in SENSOR_COLS]
#SENSOR_RGRADSTD = [x+'_grad_rstd' for x in SENSOR_COLS]

SENSOR_RSTATS = [SENSOR_RMEANS, SENSOR_RSTDS, SENSOR_RMINS, SENSOR_RMAXES, SENSOR_RSKEWS, SENSOR_RSUMS, SENSOR_RVARS]

ROLL_DESCR = ['rmean', 'rstd', 'rmin', 'rmax', 'rskew', 'rsum', 'rvar']

In [None]:
train = pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/train.csv')
print(train.shape)
print(train.columns)

train.head(6)

In [None]:
train_small=train[:20]
train_small

In [None]:
test_files = []
for dirname, _, filenames in os.walk(V_PATH+'/test/'):
    for filename in filenames:
        test_files.append(filename[:-4]) # without .csv extension
        
test = pd.DataFrame(test_files, columns=["segment_id"])
test.head(6)

In [None]:
test_small=test[:20]
test_small

In [None]:
sample_submission = pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/sample_submission.csv')
sample_submission.head(5)

In [None]:
sns.distplot(train['time_to_eruption'], 
             hist=True, 
             kde=False, 
             bins=100, 
             color = 'blue', 
             hist_kws={'edgecolor':'black'})

In [None]:
fulltrain = pd.read_csv('../input/testtraindatasets/volcano_train_small_fts_0_4431.csv')
fulltest = pd.read_csv('../input/testtraindatasets/volcano_test_small_fts_0_4520.csv')

In [None]:
train_20=fulltrain[:4431]
test_20=fulltest[:4520]

In [None]:
train_20

In [None]:
test_20

In [None]:
X = train_20.drop(['segment_id', 'time_to_eruption', 'Unnamed: 0' ], axis=1)
y = train_20['time_to_eruption']

X_train, X_valid, y_train, y_valid = train_test_split(X, y, 
                                                      test_size=0.2, 
                                                      random_state=42)

In [None]:
# Default parameters
params = {
    'boosting_type': 'gbdt', 
    'num_leaves': 31,
    'max_depth': -1,
    'learning_rate': 0.1,
    'n_estimators': 100, 
    'subsample_for_bin': 200, # 200000 is default 
    #'objective': 'binary'
    'min_split_gain': 0.5,    # 0.0 is default 
    'min_child_weight': 1e-3, 
    'min_child_samples': 20,
    'subsample': 1,
    'colsample_bytree': 1.0,
    'min_data_in_leaf': 20,
    'feature_fraction': 1.0,
    'bagging_fraction': 1.0,
    'random_state': 42
    #'device': 'cpu', # you can use GPU to achieve faster learning
}
        
# Initiate classifier to use
model_lgbm_regr = LGBMRegressor(boosting_type = params['boosting_type'], 
                                num_leaves = params['num_leaves'],
                                max_depth = params['max_depth'],
                                learning_rate = params['learning_rate'],
                                n_estimators = params['n_estimators'],
                                subsample_for_bin = params['subsample_for_bin'],
                                #objective = params['objective'],
                                min_split_gain = params['min_split_gain'], 
                                min_child_weight = params['min_child_weight'], 
                                min_child_samples = params['min_child_samples'],
                                subsample = params['subsample'],
                                colsample_bytree = params['colsample_bytree'],
                                min_data_in_leaf = params['min_data_in_leaf'],
                                feature_fraction = params['feature_fraction'],
                                bagging_fraction = params['bagging_fraction'],
                                random_state = params['random_state'],
                                #n_jobs = 5, 
                                silent = True
                               )

# To view the default model parameters:
model_lgbm_regr.get_params().keys()

In [None]:
model_lgbm_regr.fit(X_train, y_train, 
eval_set= [(X_train, y_train), (X_valid, y_valid)], eval_metric="mae", verbose=200, early_stopping_rounds=50)


In [None]:
predictions = model_lgbm_regr.predict(test_20.drop(columns=['segment_id', 'Unnamed: 0']))

In [None]:
submission = pd.DataFrame()
submission['segment_id'] = test_20["segment_id"]
submission['time_to_eruption'] = predictions
submission.to_csv('submission.csv', header=True, index=False)

In [None]:
submission