In [1]:
import os
import pandas as pd
import numpy as np
from datetime import timedelta
from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import gc
import warnings
warnings.filterwarnings("ignore")

import torch 
import torch.nn as nn

from tqdm import tqdm
import random

In [3]:
# Reading the train dataset
DF_train = pd.read_csv('../Data/training_series_long.csv')

# Reading the test dataset
DF_test  = pd.read_csv('../Data/exemplary_solution.csv', header = None)
DF_test = DF_test.iloc[:,[0, 1]]
DF_test.columns = ['hostname', 'series']

print(DF_train.shape, DF_test.shape)

In [4]:
# Selecting only the hostnames and series that are present in the test dataset
print(DF_train.shape)
DF_train = pd.merge(DF_train, DF_test, 
                    left_on  = ['hostname', 'series'], 
                    right_on = ['hostname', 'series'],
                    how = 'inner')

print(DF_train.shape)

DF_train.time_window = pd.to_datetime(DF_train.time_window)

In [5]:
# Forming the days which are not present

start_date = DF_train.time_window.min()
end_date = DF_train.time_window.max() + timedelta(days = 7)

print("Start date", start_date)
print("End date", end_date)

date = pd.date_range(start = start_date, end = end_date, freq = 'H')
temp = pd.DataFrame({'date' : date})

final_DF = pd.DataFrame()
for i, data in tqdm(DF_test.iterrows()):
    
    temp['hostname'] = data['hostname']
    temp['series']   = data['series']
    
    final_DF = final_DF.append(temp)

final_DF.reset_index(drop = True, inplace = True).head()

#final_DF.to_pickle('../Data/interim.pickle')

In [6]:
print(final_DF.shape)
final_DF = pd.merge(final_DF, DF_train,
                    left_on = ['hostname', 'series', 'date'], 
                    right_on = ['hostname', 'series', 'time_window'], 
                    how = 'left')
print(final_DF.shape)

In [7]:
final_DF.drop(['time_window'], axis=1, inplace = True)
final_DF.rename(columns = {'date':'time_window'}, inplace = True
final_DF = final_DF[['hostname', 'series', 'time_window', 'Mean', 'SD', 'Open', 'High', 'Low', 'Close', 'Volume']] 

final_DF.to_pickle('../Data/interim_2.pickle')

#  ----------------------------------------------------------------------------------------------------------

# Reading the file

In [8]:
final_DF = pd.read_pickle('../Data/interim_2.pickle')

print(final_DF.shape)
final_DF.head()

(20920000, 10)


Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume
0,host0001,cpuusagebyproc,2019-12-02 08:00:00,4.67,,4.67,4.67,4.67,4.67,1.0
1,host0001,cpuusagebyproc,2019-12-02 09:00:00,,,,,,,
2,host0001,cpuusagebyproc,2019-12-02 10:00:00,7.9125,1.6132,7.74,9.33,5.71,9.33,4.0
3,host0001,cpuusagebyproc,2019-12-02 11:00:00,,,,,,,
4,host0001,cpuusagebyproc,2019-12-02 12:00:00,,,,,,,


In [9]:
condition = final_DF.Mean<0

final_DF.Mean[condition] = 0
final_DF.Low[condition] = 0
final_DF.Volume[condition] = 0
final_DF.SD[condition] = 0

final_DF.loc[final_DF.Mean<0,:]

Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume


# Feature Engineering

In [11]:
final_DF['year'] = final_DF.time_window.dt.year
final_DF['month'] = final_DF.time_window.dt.month
final_DF['day'] = final_DF.time_window.dt.day
final_DF['hour'] = final_DF.time_window.dt.hour
final_DF['week'] = final_DF.time_window.dt.week
final_DF['week_day_no'] = final_DF.time_window.dt.weekday
final_DF.head()

Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume,year,month,day,hour,week,week_day_no
0,host0001,cpuusagebyproc,2019-12-02 08:00:00,4.67,,4.67,4.67,4.67,4.67,1.0,2019,12,2,8,49,0
1,host0001,cpuusagebyproc,2019-12-02 09:00:00,,,,,,,,2019,12,2,9,49,0
2,host0001,cpuusagebyproc,2019-12-02 10:00:00,7.9125,1.6132,7.74,9.33,5.71,9.33,4.0,2019,12,2,10,49,0
3,host0001,cpuusagebyproc,2019-12-02 11:00:00,,,,,,,,2019,12,2,11,49,0
4,host0001,cpuusagebyproc,2019-12-02 12:00:00,,,,,,,,2019,12,2,12,49,0


# Splitting to train and test prod

In [12]:
test_time_period = '2020-02-20 11:00:00'
train_DF = final_DF.loc[final_DF.time_window <= test_time_period, :]
test_DF = final_DF.loc[final_DF.time_window > test_time_period, :]

print(train_DF.shape, test_DF.shape)

(19240000, 16) (1680000, 16)


# Fill the Null with 0

In [13]:
train_DF.fillna(0, inplace = True)
train_DF.head()

Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume,year,month,day,hour,week,week_day_no
0,host0001,cpuusagebyproc,2019-12-02 08:00:00,4.67,0.0,4.67,4.67,4.67,4.67,1.0,2019,12,2,8,49,0
1,host0001,cpuusagebyproc,2019-12-02 09:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,9,49,0
2,host0001,cpuusagebyproc,2019-12-02 10:00:00,7.9125,1.6132,7.74,9.33,5.71,9.33,4.0,2019,12,2,10,49,0
3,host0001,cpuusagebyproc,2019-12-02 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,11,49,0
4,host0001,cpuusagebyproc,2019-12-02 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,12,49,0


# Label Encoding

In [15]:
le_hostname = LabelEncoder()
train_DF.hostname= le_hostname.fit_transform(train_DF.hostname)

le_series = LabelEncoder()
train_DF.series = le_series.fit_transform(train_DF.series)

train_DF.head()

Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume,year,month,day,hour,week,week_day_no
0,0,13,2019-12-02 08:00:00,4.67,0.0,4.67,4.67,4.67,4.67,1.0,2019,12,2,8,49,0
1,0,13,2019-12-02 09:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,9,49,0
2,0,13,2019-12-02 10:00:00,7.9125,1.6132,7.74,9.33,5.71,9.33,4.0,2019,12,2,10,49,0
3,0,13,2019-12-02 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,11,49,0
4,0,13,2019-12-02 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019,12,2,12,49,0


# Scaling the features

In [16]:
# Scaling the independent column
indep_cols_2_scale = train_DF.columns.difference(['time_window',
                                                  'Mean',
                                                  'hostname',
                                                  'series'
                                                 ])
print("Columns to scale", indep_cols_2_scale)
indep_scaler = MinMaxScaler()
indep_scaler.fit(train_DF.loc[:, indep_cols_2_scale])
train_DF[indep_cols_2_scale] = indep_scaler.transform(train_DF.loc[:, indep_cols_2_scale])


# Scaling the dependent column
dep_scaler = MinMaxScaler()
dep_scaler.fit(train_DF[['Mean']])
train_DF['Mean']= dep_scaler.transform(train_DF[['Mean']])

hostname_scaler = MinMaxScaler()
train_DF['hostname']= hostname_scaler.fit_transform(train_DF[['hostname']])

series_scaler = MinMaxScaler()
train_DF['series']= series_scaler.fit_transform(train_DF[['series']])

train_DF.head()

Columns to scale Index(['Close', 'High', 'Low', 'Open', 'SD', 'Volume', 'day', 'hour', 'month',
       'week', 'week_day_no', 'year'],
      dtype='object')


Unnamed: 0,hostname,series,time_window,Mean,SD,Open,High,Low,Close,Volume,year,month,day,hour,week,week_day_no
0,0.0,0.5,2019-12-02 08:00:00,2.871618e-09,0.0,9.12909e-10,9.955924e-11,2.155062e-08,1.01001e-09,8.2e-05,0.0,1.0,0.033333,0.347826,0.941176,0.0
1,0.0,0.5,2019-12-02 09:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.033333,0.391304,0.941176,0.0
2,0.0,0.5,2019-12-02 10:00:00,4.865456e-09,8.659102e-10,1.513044e-09,1.989053e-10,2.63499e-08,2.017858e-09,0.000328,0.0,1.0,0.033333,0.434783,0.941176,0.0
3,0.0,0.5,2019-12-02 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.033333,0.478261,0.941176,0.0
4,0.0,0.5,2019-12-02 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.033333,0.521739,0.941176,0.0


In [21]:
def get_X_Y(dataset, step_size, slide_window = 1, days_to_forecast = 100):
    
    X = []
    label = []
    date_period = []
    hostname = []
    series = []
    cols_2_drop = ['time_window', 
                   'Mean']
        
    uniq_dataset = dataset[['hostname', 'series']].drop_duplicates().reset_index(drop = True)
    
    for _, data in tqdm(uniq_dataset.iterrows(), total = len(uniq_dataset)):
    
        temp_hostname = data['hostname']
        temp_series   = data['series']
    
        condition = ((dataset['hostname'] == temp_hostname) & (dataset['series'] == temp_series))
        temp = dataset.loc[condition,:].reset_index(drop = True)
        temp = temp.reset_index(drop = True)
        
        
        in_start = 0
        n_out = days_to_forecast
                
        for in_start in range(0, (len(temp) - step_size - days_to_forecast +1), slide_window):
            in_end = in_start + step_size
            out_end = in_end + n_out
            
            # Forming the X features
            temp_v1 = temp.loc[in_start : (in_end-1), :].drop(cols_2_drop, axis = 1)
            temp_v1 = np.array(temp_v1)
            X.append(temp_v1)

            # Forming the Y label
            temp_v1 = temp.loc[in_end : (out_end-1), 'Mean'].values
            #temp_v1 = temp.loc[in_end, 'Mean']
            #temp_v1 = np.array(temp_v1)
            label.append(temp_v1)

            #Saving the hostname and series of prediction
            hostname.append(temp_hostname)
            series.append(temp_series)

            prediction_date = temp.loc[in_end : (out_end-1), 'time_window']
            date_period.append(np.array(prediction_date))
            
            
    X = torch.FloatTensor(X)
    Y = torch.FloatTensor(label)
    return X, Y, date_period, hostname, series

In [22]:
def get_test_X(dataset, step_size, slide_window = 1, days_to_forecast = 100):
    
    X = []
    hostname = []
    series = []
    date_period = []
    cols_2_drop = ['time_window', 
                   'Mean']
        
    uniq_dataset = dataset[['hostname', 'series']].drop_duplicates().reset_index(drop = True)
    
    #for _, data in uniq_dataset.iterrows():
    for _, data in tqdm(uniq_dataset.iterrows(), total = len(uniq_dataset)):
    
        temp_hostname = data['hostname']
        temp_series   = data['series']
    
        #print(_ + 2, temp_hostname, temp_series)
        
        condition = ((dataset['hostname'] == temp_hostname) & (dataset['series'] == temp_series))
        temp = dataset.loc[condition,:].reset_index(drop = True)
        temp = temp.reset_index(drop = True)
        
        
        in_start = 0
        n_out = days_to_forecast
                
        for in_start in range(0, (len(temp) - step_size + 1)):
            in_end = in_start + step_size
            out_end = in_end + n_out
            
            
            # Forming the X features
            temp_v1 = temp.loc[in_start : (in_end-1), :].drop(cols_2_drop, axis = 1)
            temp_v1 = np.array(temp_v1)
            X.append(temp_v1)

            #Saving the hostname and series of prediction
            hostname.append(temp_hostname)
            series.append(temp_series)

            prediction_date = temp.loc[in_start : (in_end-1), 'time_window']
            date_period.append(np.array(prediction_date))
                        
    X = torch.FloatTensor(X)
    return X, date_period, hostname, series

# Splitting to train_local, valid_local, train_prod, test_prod

In [47]:
#### Train local and validation local

train_local_time_start  =  pd.to_datetime('2020-01-15 23:00:00')
train_local_time_end  =  pd.to_datetime('2020-02-7 23:00:00')

validation_time_start = train_local_time_end - timedelta(days = 7)
validation_time_end = pd.to_datetime('2020-02-20 23:00:00')

train_local = train_DF.loc[((train_DF.time_window > train_local_time_start) & 
                            (train_DF.time_window <= train_local_time_end)), :]

valid_local = train_DF.loc[((train_DF.time_window > validation_time_start) &
                            (train_DF.time_window <= validation_time_end)), :]

print(train_local.shape, valid_local.shape)

print("Train local start:" , train_local.time_window.min())
print("Train local end:  ", train_local.time_window.max())
print("Valid local start:" , valid_local.time_window.min())
print("Valid local end:  " , valid_local.time_window.max())
print("")


#### Train prod and test prod
steps = 336
test_time_period = '2020-02-20 11:00:00'

prod_threshold = pd.to_datetime(test_time_period)
train_prod_time_end  = prod_threshold
#test_prod_time_start = prod_threshold - timedelta(hours = 168)
test_prod_time_start = prod_threshold - timedelta(hours = steps)

train_prod = train_DF.loc[train_DF.time_window <= train_prod_time_end, :]
test_prod  = train_DF.loc[train_DF.time_window > test_prod_time_start, :]

print(train_prod.shape, test_prod.shape)

print("Train prod start:" , train_prod.time_window.min())
print("Train prod end:" , train_prod.time_window.max())
print("Test prod start:" , test_prod.time_window.min())
print("Test prod end:" , test_prod.time_window.max())

(5520000, 16) (4680000, 16)
Train local start: 2020-01-16 00:00:00
Train local end:   2020-02-07 23:00:00
Valid local start: 2020-02-01 00:00:00
Valid local end:   2020-02-20 11:00:00

(19240000, 16) (3360000, 16)
Train prod start: 2019-12-02 08:00:00
Train prod end: 2020-02-20 11:00:00
Test prod start: 2020-02-06 12:00:00
Test prod end: 2020-02-20 11:00:00


In [None]:
forecast_days_override = 168

train_local_slide = 100
valid_local_slide = 100
train_prod_slide = 1
test_prod_slide = 1

print("Running the train local")
train_local_X, train_local_Y, train_local_dates, train_local_hostname, train_local_series = get_X_Y(dataset = train_local,
                                                                                                    step_size = steps, 
                                                                                                    slide_window = train_local_slide,
                                                                                                    days_to_forecast = forecast_days_override)

gc.collect()
print("Saving the train_local")
np.save('../Data/train_local_X', train_local_X)
np.save('../Data/train_local_Y', train_local_Y)
np.save('../Data/train_local_dates', train_local_dates)
np.save('../Data/train_local_hostname', train_local_hostname)
np.save('../Data/train_local_series', train_local_series)


print("Running the valid local")
valid_local_X, valid_local_Y, valid_local_dates, valid_local_hostname, valid_local_series = get_X_Y(dataset = valid_local,
                                                                                                    step_size = steps, 
                                                                                                    slide_window = valid_local_slide,
                                                                                                    days_to_forecast = forecast_days_override)
gc.collect()
print("Saving the valid_local")
np.save('../Data/valid_local_X', valid_local_X)
np.save('../Data/valid_local_Y', valid_local_Y)
np.save('../Data/valid_local_dates', valid_local_dates)
np.save('../Data/valid_local_hostname', valid_local_hostname)
np.save('../Data/valid_local_series', valid_local_series)

print("Running the test prod")
test_prod_X, test_prod_dates, test_prod_hostname, test_prod_series = get_test_X(dataset = test_prod,
                                                                                step_size = steps,
                                                                                slide_window = 1,
                                                                                days_to_forecast = forecast_days_override)
gc.collect()
print("Saving the test_prod")
np.save('../Data/test_prod_X', test_prod_X)
np.save('../Data/test_prod_dates', test_prod_dates)
np.save('../Data/test_prod_hostname', test_prod_hostname)
np.save('../Data/test_prod_series', test_prod_series)

print("Running the train prod")
train_prod_X, train_prod_Y, train_prod_dates, train_prod_hostname, train_prod_series = get_X_Y(dataset = train_prod,
                                                                                               step_size = steps, 
                                                                                               slide_window = 1,
                                                                                               days_to_forecast = forecast_days_override)

gc.collect()
print("Saving the train_prod")
np.save('../Data/train_prod_X', train_prod_X)
np.save('../Data/train_prod_Y', train_prod_Y)
np.save('../Data/train_prod_dates', train_prod_dates)
np.save('../Data/train_prod_hostname', train_prod_hostname)
np.save('../Data/train_prod_series', train_prod_series)


print("Train local shape", train_local_X.shape, train_local_Y.shape)
print("Valid local shape", valid_local_X.shape, valid_local_Y.shape)
print("Train prod shape", train_prod_X.shape, train_prod_Y.shape)
print("Test prod shape", test_prod_X.shape)

In [52]:
# Assign to GPU if there is a GPU

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [53]:
# Defining the model
class LSTM(nn.Module):
    def __init__(self, batch_size, num_features=1, hidden_dim = 10, output_size = 1, num_layers = 1):
        super(LSTM, self).__init__()
        
        self.batch_size = batch_size
        self.num_features = num_features
        self.hidden_dim = hidden_dim
        self.output_size = output_size
        self.num_layers = num_layers
        
        #LSTM layers
        self.lstm = nn.LSTM(input_size = self.num_features, 
                            hidden_size = self.hidden_dim, 
                            num_layers = self.num_layers
                            ,batch_first = True
                           )
        
        #Output layer
        self.linear = nn.Linear(self.hidden_dim, self.output_size)
        
    def init_hidden(self):
        hidden_state = torch.zeros(self.num_layers, self.batch_size, self.hidden_dim)
        cell_state = torch.zeros(self.num_layers, self.batch_size, self.hidden_dim)
        self.hidden = (hidden_state, cell_state)
    
    def forward(self, input_seq):
        lstm_out, self.hidden = self.lstm(input_seq)
        prediction = self.linear(lstm_out[:,-1])
        return prediction
        


In [54]:
model = LSTM(batch_size = train_local_X.shape[0],
             num_features = train_local_X.shape[2], 
             hidden_dim = 25, 
             output_size = train_local_Y.shape[1], 
             num_layers = 2)

model = model.to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

print(model)

LSTM(
  (lstm): LSTM(14, 25, num_layers=2, batch_first=True)
  (linear): Linear(in_features=25, out_features=168, bias=True)
)


In [None]:
EPOCHS = 200

seed_val = 100
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)


train_batch_size = 1000
valid_batch_size = 1000

tr_steps = int(train_local_X.shape[0]/ train_batch_size)
val_steps = int(valid_local_X.shape[0]/ valid_batch_size)


for epoch in range(EPOCHS):
    
    print("Epoch :" , epoch)
        
    train_loss = 0
    valid_loss = 0
    
    model.train()
    
    #Forward pass
    for i in range(0, len(train_local_X)- train_batch_size+1, train_batch_size):
        
        start = i
        end = i+train_batch_size
        
        train_X = train_local_X[start:end].cuda()#to(device)
        train_Y = train_local_Y[start:end].cuda()#.to(device)

        y_pred_train = model(train_X)
        tr_loss = loss_function(y_pred_train, train_Y)

        model.hidden = model.init_hidden()

        train_loss += tr_loss
        
        tr_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        
    # Evaluation
    with torch.no_grad():
        model.eval()
        for i in range(0, len(valid_local_X)-valid_batch_size + 1, valid_batch_size):
            
            start = i
            end = i + valid_batch_size
            
            valid_X = valid_local_X[i: (i+valid_batch_size)].to(device)
            valid_Y = valid_local_Y[i: (i+valid_batch_size)].to(device)

            y_pred_valid = model(valid_X)
            val_loss = loss_function(y_pred_valid, valid_Y)
            
            valid_loss += val_loss

    print("Train Loss: {}".format(train_loss/tr_steps))
    print("Validation Loss: {}".format(valid_loss/val_steps))
 
    print("")

In [61]:
host = np.round(hostname_scaler.inverse_transform([test_prod_hostname]).reshape(-1).astype('float'), 0).astype('int')
host = le_hostname.inverse_transform(host)

series = series_scaler.inverse_transform([test_prod_series]).reshape(-1).astype('int')
series = le_series.inverse_transform(series)

prod_prediction_rescaled = dep_scaler.inverse_transform(prod_prediction.detach().cpu().numpy())
prod_prediction_rescaled = pd.DataFrame(prod_prediction_rescaled.tolist())


Unnamed: 0,hostname,series,0,1,2,3,4,5,6,7,...,158,159,160,161,162,163,164,165,166,167
0,host0001,cpuusagebyproc,19334086.0,2.350618e+03,-10781509.0,4297608.0,-11132187.0,-2509830.000,-9963567.0,-19259364.0,...,-4989732.00,8290387.5,4919952.5,7.590709e+05,-6.986909e+05,-9069351.00,-16457067.0,-6373943.00,6.802010e+05,7514220.0
1,host0001,memoryallocatedbyproc,19349452.0,1.067592e+06,-14507311.0,4320193.5,-11821379.0,-3603418.500,-9910060.0,-19552246.0,...,-4742626.00,9383134.0,6092608.0,-5.033685e+05,-2.214264e+06,-7542612.00,-16569530.0,-7713262.00,4.398079e+05,7454685.0
2,host0003,cpu_1m,19272842.0,-1.295675e+06,-7861702.5,5326936.5,-10635214.0,-1326657.250,-10844782.0,-18852514.0,...,-5127231.00,8217130.5,4773342.0,2.446696e+06,3.382921e+05,-10803404.00,-16213472.0,-5813429.50,9.652679e+05,8071564.5
3,host0003,cpu_5m,19286294.0,-1.106184e+06,-8222510.0,5138257.0,-10692864.0,-1490619.000,-10687412.0,-18909074.0,...,-5112085.00,8179011.5,4745704.0,2.200324e+06,2.182864e+05,-10558297.00,-16254257.0,-5854795.50,9.253196e+05,7973360.0
4,host0003,cpu_5s,19297626.0,-9.250893e+05,-8584578.0,4967558.5,-10752042.0,-1649952.125,-10544218.0,-18964034.0,...,-5096515.50,8155166.0,4732072.5,1.964423e+06,9.522729e+04,-10321467.00,-16291785.0,-5904679.50,8.866071e+05,7883800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,host9991,out_traffic,12979348.0,1.392256e+06,-17412626.0,4577113.5,-15323000.0,-6896343.500,-8756391.0,-9154179.0,...,-2993160.50,7570613.5,6561495.5,3.735798e+06,-9.177037e+06,-3505958.75,-16209189.0,-6161260.50,-3.844230e+06,9350183.0
9996,host9993,cpuusagebyproc,13529750.0,1.877102e+05,-13326550.0,4643367.0,-15223208.0,-6563252.500,-9221014.0,-8509092.0,...,-3150627.75,5044281.0,4836445.0,5.167997e+06,-8.218451e+06,-5247621.50,-16633006.0,-3954090.75,-3.609289e+06,9778898.0
9997,host9993,memoryallocatedbyproc,12937230.0,1.270049e+06,-17111070.0,4509018.5,-15365384.0,-6897555.500,-8646591.0,-9130939.0,...,-3031546.00,7303636.5,6423015.0,3.894308e+06,-9.056625e+06,-3602915.75,-16257350.0,-5995530.00,-3.829762e+06,9310201.0
9998,host9996,cpu_5s,13956161.0,-7.909587e+05,-11167132.0,5520365.5,-15342992.0,-6264245.500,-10191238.0,-8060753.5,...,-3193108.50,4157952.5,4435628.0,6.455748e+06,-7.861808e+06,-6688998.50,-16852192.0,-3018423.75,-3.378420e+06,10431434.0


In [None]:
sub_DF = pd.DataFrame({'hostname' : host,
                       'series': series
                      })
sub_DF = sub_DF.join(prod_prediction_rescaled)
sub_DF

sub_DF.to_csv('../Submission/sub_4.csv', index = False, header = None)