In [6]:
import pandas as pd
import numpy as np
from Utils import preprocess
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import KFold

In [7]:
train_X = preprocess.light("data/light_train_source_labels.csv")
train_y = preprocess.light("data/light_train_target_labels.csv")

# Convert the data into the appropriate format for the LSTM model
train_X = train_X.reshape(train_X.shape[0], train_X.shape[1], 1)  # Reshape to (samples, timesteps, features)

In [8]:
seed = 1105

In [9]:
k_fold = KFold(n_splits=5, shuffle=False, random_state=None)

# Create the LSTM model
cv_scores=[]
for train, test in k_fold.split(train_X, train_y):
  model = Sequential()
  model.add(LSTM(units=128, input_shape=(train_X.shape[1], 1), return_sequences=True))
  model.add(Dropout(0.2))
  model.add(LSTM(units=64))
  model.add(Dense(units=28, activation='sigmoid'))
  # Compile the model
  model.compile(optimizer='adam', loss='binary_crossentropy')
  # Fit the model on the training data
  model.fit(train_X[train], train_y[train], epochs=5, batch_size=128)
  scores = model.evaluate(train_X[test], train_y[test], verbose=0)
  print(f"{model.metrics_names[1]:.2f}, {scores[1]*100:.2f}")
  cv_scores.append(scores[1]*100)
print("The mean of cross validation score: ", np.mean(cv_scores))




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


IndexError: list index out of range

In [10]:
print(model.summary())
model.save("LSTM.h5")
model.save_weights("LSTM_weights.h5")

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1036, 256)         264192    
                                                                 
 dropout (Dropout)           (None, 1036, 256)         0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                82176     
                                                                 
 dense (Dense)               (None, 28)                1820      
                                                                 
Total params: 348,188
Trainable params: 348,188
Non-trainable params: 0
_________________________________________________________________
None


In [12]:
# # Predict the next 28 timeslots
test_x = preprocess.light("data/light_test_source_labels.csv")
test_x = test_x.reshape(test_x.shape[0], test_x.shape[1], 1)

result = model.predict(test_x)
# result = result.flatten()

# Print the predicted timeslots
print(result)

[[-0.21647142  0.34016457  0.08051411 ...  0.2611325   0.05835225
  -0.15089525]
 [-0.20734006  0.29530123  0.05653251 ...  0.23883344  0.04654413
  -0.1426166 ]
 [-0.22021909  0.30263537  0.05738454 ...  0.2490086   0.04990095
  -0.15191677]
 ...
 [-0.21752898  0.38258404  0.0982713  ...  0.22957045  0.04551012
  -0.15477593]
 [-0.09416587  0.16022019  0.03468331 ...  0.13581805  0.03453499
  -0.0733959 ]
 [-0.13713595  0.22456016  0.04702961 ...  0.17070274  0.03565332
  -0.09722348]]


In [13]:
display(result.shape)
display(type(result))
result = result.transpose()
display(result)

(7616, 28)

numpy.ndarray

array([[-0.21647142, -0.20734006, -0.22021909, ..., -0.21752898,
        -0.09416587, -0.13713595],
       [ 0.34016457,  0.29530123,  0.30263537, ...,  0.38258404,
         0.16022019,  0.22456016],
       [ 0.08051411,  0.05653251,  0.05738454, ...,  0.0982713 ,
         0.03468331,  0.04702961],
       ...,
       [ 0.2611325 ,  0.23883344,  0.2490086 , ...,  0.22957045,
         0.13581805,  0.17070274],
       [ 0.05835225,  0.04654413,  0.04990095, ...,  0.04551012,
         0.03453499,  0.03565332],
       [-0.15089525, -0.1426166 , -0.15191677, ..., -0.15477593,
        -0.0733959 , -0.09722348]], dtype=float32)

In [14]:
sample = pd.read_csv("data/sample.csv")
display(sample)

Unnamed: 0,user_id,time_slot_0,time_slot_1,time_slot_2,time_slot_3,time_slot_4,time_slot_5,time_slot_6,time_slot_7,time_slot_8,...,time_slot_18,time_slot_19,time_slot_20,time_slot_21,time_slot_22,time_slot_23,time_slot_24,time_slot_25,time_slot_26,time_slot_27
0,30460,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,30463,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7611,38071,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7612,38072,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7613,38073,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7614,38074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
answer = sample
# answer.iloc[:,1:] = result
for i in range(1,29):
    answer.iloc[:,i] = result[i-1]
display(answer)

Unnamed: 0,user_id,time_slot_0,time_slot_1,time_slot_2,time_slot_3,time_slot_4,time_slot_5,time_slot_6,time_slot_7,time_slot_8,...,time_slot_18,time_slot_19,time_slot_20,time_slot_21,time_slot_22,time_slot_23,time_slot_24,time_slot_25,time_slot_26,time_slot_27
0,30460,-0.216471,0.340165,0.080514,0.061438,0.240802,0.323679,-0.063382,0.053328,0.186519,...,0.045913,-0.101672,0.144971,0.253313,0.049612,0.041902,0.135521,0.261133,0.058352,-0.150895
1,30461,-0.207340,0.295301,0.056533,0.040361,0.225084,0.295786,-0.046166,0.041057,0.181033,...,0.037762,-0.096238,0.143178,0.247243,0.041338,0.030208,0.137661,0.238833,0.046544,-0.142617
2,30462,-0.220219,0.302635,0.057385,0.041283,0.231681,0.307635,-0.046390,0.043494,0.193194,...,0.042650,-0.098533,0.155204,0.261081,0.042869,0.029807,0.149981,0.249009,0.049901,-0.151917
3,30463,-0.094305,0.160369,0.034700,0.021408,0.120997,0.162074,-0.028551,0.020881,0.069279,...,0.029779,-0.041565,0.060555,0.118113,0.030032,0.019842,0.059846,0.135971,0.034550,-0.073453
4,30464,-0.116521,0.182560,0.037828,0.024946,0.138090,0.188678,-0.029257,0.023672,0.085096,...,0.030278,-0.052401,0.072404,0.143986,0.032071,0.019257,0.072256,0.160364,0.038160,-0.083441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7611,38071,-0.094913,0.161016,0.034775,0.021463,0.121515,0.162775,-0.028580,0.020979,0.069665,...,0.029733,-0.041871,0.060830,0.118799,0.030093,0.019845,0.060221,0.136643,0.034621,-0.073708
7612,38072,-0.230000,0.391142,0.101063,0.067251,0.278388,0.355726,-0.081069,0.061157,0.238783,...,0.049111,-0.114463,0.174712,0.260817,0.049371,0.054745,0.148936,0.236854,0.047931,-0.163367
7613,38073,-0.217529,0.382584,0.098271,0.065185,0.271813,0.341868,-0.079918,0.058856,0.224125,...,0.043990,-0.111596,0.162225,0.248407,0.047603,0.054995,0.138592,0.229570,0.045510,-0.154776
7614,38074,-0.094166,0.160220,0.034683,0.021396,0.120878,0.161913,-0.028544,0.020859,0.069190,...,0.029791,-0.041494,0.060492,0.117957,0.030017,0.019841,0.059760,0.135818,0.034535,-0.073396


In [16]:
answer.to_csv("submit.csv", index=False)