In [10]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [11]:
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz

import gc
import pickle as pickle

from sklearn.model_selection import KFold
from itertools import product
import tensorflow as tf
from sklearn.preprocessing import StandardScaler,MinMaxScaler 
import multiprocessing as mp
import importlib
from sklearn.model_selection import train_test_split

In [12]:
#tail -n +2 train.csv | split -l 150000


from os import listdir
from os.path import isfile, join

TRAIN_SPLITS='train'
splits = [f for f in listdir(TRAIN_SPLITS) if isfile(join(TRAIN_SPLITS, f))]

In [13]:
#
columns = ['acoustic_data','time_to_failure']



In [None]:
train_data, val_data = train_test_split(splits, test_size=0.1, random_state=42)

In [None]:
TIMESTEPS=150000
BATCH_SIZE=16
NUMBER_OF_BATCHES = int(np.ceil(len(train_data)/BATCH_SIZE))
NUMBER_OF_VALIDATION_STEPS = int(np.ceil(len(val_data) / BATCH_SIZE))

In [None]:
train_data_batch = np.array_split(train_data, NUMBER_OF_BATCHES)
val_data_batch = np.array_split(val_data, NUMBER_OF_VALIDATION_STEPS)

In [None]:
NOISE=0.75

def add_noise(dff, pct=NOISE):
    mu = dff['acoustic_data'].mean()
    sigma = dff['acoustic_data'].std()

    indices = np.random.choice(dff.index.values, int(len(dff)*pct))
    dff.loc[indices, 'acoustic_data'] = np.random.normal(mu, sigma, len(indices)) 
    return dff


def get_batch(list_of_files, valid=False):
#     batch = np.empty((len(list_of_files),TIMESTEPS,1),dtype=float)
#     target = np.empty((len(list_of_files),1),dtype=float)
    #print(list_of_files)
    batch = []
    target = []

    for idx, file in enumerate(list_of_files):
        #print(idx,file)
        path = f'train/{file}'
        df = pd.read_csv(path, float_precision='round_trip', header=None)
        df.columns = columns
        df[['acoustic_data']] = StandardScaler().fit_transform(df[['acoustic_data']].astype('float'))
        #print(df.head())
        #print(len(batch))
        batch.append(df['acoustic_data'].values)
        target.append(df['time_to_failure'].values[-1])
        #print(df_noise.head())
        if not valid:
            df_noise = add_noise(df)
            batch.append(df_noise['acoustic_data'].values)#.reshape(-1,TIMESTEPS,1))
            target.append(df_noise['time_to_failure'].values[-1])#.reshape(-1,1))
        #print(np.array(batch).reshape(-1,TIMESTEPS,1).shape)
        #batch = np.array(batch).reshape(-1,TIMESTEPS,1)
        #target = np.array(target).reshape(-1,1)
    return (batch, target)

In [None]:
from keras.utils import Sequence

class MY_Generator(Sequence):

    def __init__(self, list_of_files, steps,name):
        self.list_of_files = list_of_files
        self.steps = steps
        self.name = name

    #This function computes the number of batches that this generator is supposed to produce. 
    #So, we divide the number of total samples by the batch_size and return that value.    
    def __len__(self):
        return self.steps

    #Here, given the batch numberidx you need to put together a list that consists of data 
    #batch and the ground-truth (GT). In this example, we read a batch images of size 
    #self.batch and return an array of form[image_batch, GT]
    def __getitem__(self, idx):
#         if self.name == 'val':
        #print('idx', idx)
        #print("DEBUG", self.list_of_files[idx])
        #if idx == len(self.list_of_files):
            #print(idx, self.list_of_files)
        if self.name == 'val':
            valid = True
        else:
            valid=False
        train,Y = get_batch(self.list_of_files[idx], valid)
            
        #print(np.array(train).reshape(-1,TIMESTEPS,1).shape)
        #print("idx",idx)
        #print("LOLILOL")
        #print(train.shape, Y.shape)
        train = np.array(train).reshape(-1,TIMESTEPS,1)
        #print(train.shape)
        Y = np.array(Y).reshape(-1,1)

        return (train,Y)

Using TensorFlow backend.


In [None]:
#train_data, val_data, y_train, y_val = train_test_split(training, targets, test_size=0.1, random_state=42)

from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout,Flatten,GRU,Conv1D,TimeDistributed,MaxPooling1D,Flatten,CuDNNGRU,CuDNNLSTM
from keras.callbacks import EarlyStopping
from keras.layers.normalization import BatchNormalization
from keras.layers import Bidirectional

from keras import backend as K
K.clear_session()

TIMESTEPS=150000

dropout=0.2

kernel_size=5
filters=10
strides=5
pool_size=2

my_model = Sequential()
my_model.add(
        Conv1D(filters=filters, kernel_size=kernel_size, #activation='relu',
               strides=strides, input_shape=(TIMESTEPS,1))
)
             
my_model.add(MaxPooling1D(pool_size=pool_size))
my_model.add(BatchNormalization())

my_model.add(
        Conv1D(filters=filters, kernel_size=kernel_size, #activation='relu',
               strides=strides, input_shape=(TIMESTEPS,1))
)             
my_model.add(MaxPooling1D(pool_size=pool_size))
my_model.add(BatchNormalization())

my_model.add(
        Conv1D(filters=filters, kernel_size=kernel_size, #activation='relu',
               strides=strides, input_shape=(TIMESTEPS,1))
)             
my_model.add(MaxPooling1D(pool_size=pool_size))
my_model.add(BatchNormalization())


my_model.add(GRU(units = 8,dropout=dropout,recurrent_dropout=dropout))

my_model.add(Dense(1))



my_model.compile(loss = 'mae',optimizer = 'adam', metrics = ['mean_absolute_error'])
my_model.summary()




callbacks = [
    EarlyStopping(monitor='val_loss', patience=30, verbose=0),
    #Reseter()
]

my_training_batch_generator = MY_Generator(train_data_batch, NUMBER_OF_BATCHES, 'train')
my_validation_batch_generator = MY_Generator(val_data_batch, NUMBER_OF_VALIDATION_STEPS, 'val')


history = my_model.fit_generator(generator=my_training_batch_generator,
                                          #steps_per_epoch=NUMBER_OF_BATCHES,
                                          epochs=500,
                                          validation_data=my_validation_batch_generator,
                                          #validation_steps=NUMBER_OF_VALIDATION_STEPS,
                                          callbacks=callbacks,
                                          shuffle=True,
                                          #verbose=1,
                                          #validation_data=my_validation_batch_generator,
                                          #validation_steps=(num_validation_samples // batch_size),
                                          use_multiprocessing=True,
                                          workers=8,
                                          #max_queue_size=32
                      )


import matplotlib.pyplot as plt


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#import math
#print("best rmse val:", math.sqrt(my_model.history.history['val_mean_squared_error'][-1]))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 30000, 10)         60        
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 15000, 10)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 15000, 10)         40        
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 3000, 10)          510       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 1500, 10)          0         
_________________________________________________________________
batch_normalization_2

Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 37/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500


Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 83/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500


<Figure size 640x480 with 1 Axes>

In [None]:
TEST_SPLITS='test'
test_splits = [f for f in listdir(TEST_SPLITS) if isfile(join(TEST_SPLITS, f))]

In [None]:
test_splits

['seg_a35c82.csv',
 'seg_5165a4.csv',
 'seg_934087.csv',
 'seg_185ad6.csv',
 'seg_8d4965.csv',
 'seg_eb2e11.csv',
 'seg_c1fe9a.csv',
 'seg_8ce632.csv',
 'seg_e1a9e0.csv',
 'seg_51f6fd.csv',
 'seg_05f9d6.csv',
 'seg_9aa6e2.csv',
 'seg_6dfab7.csv',
 'seg_00be11.csv',
 'seg_486973.csv',
 'seg_049957.csv',
 'seg_2bf9dd.csv',
 'seg_214492.csv',
 'seg_f6b70c.csv',
 'seg_5407b0.csv',
 'seg_70be4d.csv',
 'seg_376908.csv',
 'seg_86d847.csv',
 'seg_2018c8.csv',
 'seg_fbe3c2.csv',
 'seg_9c2843.csv',
 'seg_dceeca.csv',
 'seg_f9e4f2.csv',
 'seg_1e0523.csv',
 'seg_743775.csv',
 'seg_061a33.csv',
 'seg_d83890.csv',
 'seg_9a43ef.csv',
 'seg_f8dd7e.csv',
 'seg_d89369.csv',
 'seg_04ae38.csv',
 'seg_d5dbc1.csv',
 'seg_e7ced6.csv',
 'seg_753ba6.csv',
 'seg_69a53e.csv',
 'seg_8fd465.csv',
 'seg_9d6ff0.csv',
 'seg_724df9.csv',
 'seg_490092.csv',
 'seg_c4af54.csv',
 'seg_4c8db6.csv',
 'seg_47d374.csv',
 'seg_268956.csv',
 'seg_907c52.csv',
 'seg_943de0.csv',
 'seg_3db0a8.csv',
 'seg_cedf3e.csv',
 'seg_b1b95e

In [None]:
test_split_chunks = np.array_split(test_splits,mp.cpu_count())

import build_segment
importlib.reload(build_segment)

from build_segment import build_segment_f

if __name__ ==  '__main__':
    pool = mp.Pool(mp.cpu_count())
    res = [pool.apply_async(build_segment_f,args=[chunk,TIMESTEPS, True]) \
           for chunk in test_split_chunks]
    pool.close()
    pool.join()

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  updated_m

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcoun

  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * ne

  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_

  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcoun

  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  out=out, **kwargs)
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type

  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcoun

  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * ne

  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) *

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcoun

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_cou

  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormali

  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type

  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_cou

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  new_unnormalized_variance = np.nanvar(X, axis=0) * new_sample_count
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcoun

In [None]:
ids = []
preds = []
i=0
for r in res:
    for df in r.get():
        if i % 100 == 0:
            print(i)
        #training[i] = df.loc[:,df.columns != 'time_to_failure']
        ids.append(df['seg_id'].unique()[0].split(".")[0])
        test_df = df.drop('seg_id', axis=1)
        preds.append(my_model.predict(test_df.values.reshape(1,-1,NUMBER_OF_FEATURES))[0][0])
        i+=1

In [None]:
submission = pd.DataFrame(ids)
submission.columns = ['seg_id']
submission['time_to_failure'] = preds

submission.to_csv('submission.csv', index=False)

In [None]:
submission["time_to_failure"].describe()

In [None]:
submission["time_to_failure"].describe()

In [None]:
submission.head()

In [None]:
preds

In [None]:
res[0].get()[0]

In [None]:
path = 'test/%s' % (np.random.choice(test_splits))
#

df = pd.read_csv(path, float_precision='round_trip', header=[0])


df.describe()