# Importing Libraries 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

# Reading train.csv and Train & Test folder

In [None]:
train = pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/train.csv')

train_dir = "../input/predict-volcanic-eruptions-ingv-oe/train/"
test_dir =  "../input/predict-volcanic-eruptions-ingv-oe/test/"

In [None]:
"""Converting time_to_eruption to hours, minutes & seconds"""

train['h:m:s'] = (train['time_to_eruption']
                  .apply(lambda x:datetime.timedelta(seconds = x/100)))
train.head()

> [](http://)**Sample train dataset**

In [None]:
sample = pd.read_csv('../input/predict-volcanic-eruptions-ingv-oe/train/1000015382.csv')
sample.head()

In [None]:
sample.describe()

Plotting sample data

In [None]:
sample.fillna(0).plot(subplots = True, figsize = (20,15))
plt.tight_layout()
plt.show()

 **Data for Training Set**

In [None]:
'''Function to get training data'''
def get_csv(index):
    
    train_data = pd.read_csv(train_dir + str(train.segment_id.iloc[index]) + ".csv")
    train_data['time_to_eruption'] = train.time_to_eruption.iloc[index]
    
    for feat in train_data.drop('time_to_eruption',1).columns:
        train_data[feat] = train_data[feat].mean()
    
    train_data = train_data.sample()
    
    return(train_data)

In [None]:
data = pd.DataFrame()

for index in range(train.shape[0]):
    data = pd.concat([get_csv(index), data])
    

In [None]:
data.shape

In [None]:
data.head()

In [None]:
for i in data:
    data[i] = data[i].replace(np.nan, data[i].mean())

data.isnull().sum()

**Data for Test Set**

In [None]:
test = pd.read_csv("../input/predict-volcanic-eruptions-ingv-oe/sample_submission.csv")

'''Function to get test data'''
def get_csv_test(index):
    
    test_data = pd.read_csv(test_dir + str(test.segment_id.iloc[index]) + ".csv")
    
    for feat in test_data.columns:
        test_data[feat] = test_data[feat].mean()
    
    test_data = test_data.sample()
    
    return(test_data)

In [None]:
data_test = pd.DataFrame()

for index in range(test.shape[0]):
    data_test = pd.concat([get_csv_test(index), data_test])

In [None]:
data_test.shape

In [None]:
for i in data_test:
    data_test[i] = data_test[i].replace(np.nan, data_test[i].mean())
data_test.isnull().sum()

In [None]:
x_train = data.drop('time_to_eruption', axis = 1)
y_train = data.time_to_eruption
x_test = data_test.copy()

# Dimensionality Reduction

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components = 5)
x_train = lda.fit_transform(x_train, y_train) 
x_test = lda.transform(x_test)

# **XGBoost**

In [None]:
import xgboost as xgb
from xgboost import XGBRegressor

model = XGBRegressor(max_depth = 10, n_estimators = 20, learning_rate = 0.3)
model.fit(x_train, y_train)

Importance Graph

In [None]:
xgb.plot_importance(model)
plt.rcParams['figure.figsize'] = [5, 5]
plt.show()

Predictions

In [None]:
pred = model.predict(x_test)
pred

In [None]:
test['time_to_eruption'] = pred
sub = test[['segment_id', 'time_to_eruption']]

In [None]:
sub.head()

In [None]:
sub.to_csv('submission.csv',index=False)