In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Load your preprocessed training data
dataset= pd.read_csv('train.csv')
df_train = dataset.drop_duplicates()
df_train= dataset.iloc[:, [0,3] + list(range(-6, 0))]
df_test = pd.read_csv('test.csv')

# Select relevant columns
selected_columns_train = ['spectrogram_id', 'seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']
selected_data_train = df_train[selected_columns_train]

selected_columns_test = ['spectrogram_id', 'eeg_id', 'patient_id']
selected_data_test = df_test[selected_columns_test]

# Separate X_train and y_train
X_train = selected_data_train[['spectrogram_id']]
y_train = selected_data_train[['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']]

# Standardize the data
scaler_X = StandardScaler()
X_train_standardized = scaler_X.fit_transform(X_train)

scaler_y = StandardScaler()
y_train_standardized = scaler_y.fit_transform(y_train)

# Reshape the data for LSTM input
X_train_standardized_reshaped = X_train_standardized.reshape((X_train_standardized.shape[0], X_train_standardized.shape[1], 1))

# Build the RNN model
model = Sequential()
model.add(LSTM(128, input_shape=(X_train_standardized_reshaped.shape[1], 1)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(y_train_standardized.shape[1], activation='softmax'))  # Use softmax activation for probability distribution

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_standardized_reshaped, np.argmax(y_train_standardized, axis=1), epochs=10, batch_size=32)

# Load test data
test_data = pd.read_csv("test.csv")

# Assuming 'spectrogram_id' is the relevant column for X_test
X_test = test_data[['spectrogram_id']]

# Standardize X_test using the same scaler_X
X_test_standardized = scaler_X.transform(X_test)

# Reshape the data for LSTM input
X_test_standardized_reshaped = X_test_standardized.reshape((X_test_standardized.shape[0], X_test_standardized.shape[1], 1))

# Reshape the data for LSTM input
X_test_standardized_reshaped = X_test_standardized.reshape((X_test_standardized.shape[0], X_test_standardized.shape[1], 1))

# Make predictions on the test data
predictions = model.predict(X_test_standardized_reshaped)

# Inverse transform the predictions to the original scale
predictions_original_scale = scaler_y.inverse_transform(predictions)

# Ensure that the predicted probabilities sum to 1 for each row
predictions_softmax = tf.nn.softmax(tf.convert_to_tensor(predictions_original_scale), axis=-1)

# Display or save the predictions as needed
submission_df = pd.DataFrame({
    'eeg_id': df_test['eeg_id'],
    'seizure_vote': predictions_softmax[:, 0],
    'lpd_vote': predictions_softmax[:, 1],
    'gpd_vote': predictions_softmax[:, 2],
    'lrda_vote': predictions_softmax[:, 3],
    'grda_vote': predictions_softmax[:, 4],
    'other_vote': predictions_softmax[:, 5]
})

# Ensure the predicted probabilities sum to 1 for each row
predictions_sum = np.sum(predictions_softmax, axis=1)
assert np.allclose(predictions_sum, 1.0, atol=1e-5), "Predicted probabilities do not sum to 1 for each row"

# Save the submission DataFrame as a CSV file
submission_df.to_csv("submission.csv", index=False)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
