# What does the data look like?

In [None]:
import pandas as pd

df_data_dp = pd.read_csv("/content/dp_rep1.csv")
df_data_norm = pd.read_csv("/content/norm_rep1.csv")
df_data_sp = pd.read_csv("/content/sp_rep1.csv")

df_data_dp.tail(), df_data_norm.head(), df_data_sp.head()

# Training Data

In [None]:
import pandas as pd
import numpy as np

def transform_dataframe(df):
    # Extract the first column and transpose
    first_col = df.iloc[:, 0].values.reshape(1, -1)

    # Extract the fifth column's value
    fifth_col_value = df.iloc[0, 4]  # Assuming you want the first row of the fifth column

    # Combine
    combined = np.hstack((first_col, [[fifth_col_value]]))

    return combined

# List of file paths
train_filepaths = [
    '/content/dp_rep1.csv', '/content/dp_rep2.csv', '/content/dp_rep3.csv', '/content/dp_rep4.csv', '/content/dp_rep5.csv', '/content/dp_rep6.csv', '/content/dp_rep7.csv', '/content/dp_rep8.csv',
    '/content/norm_rep1.csv', '/content/norm_rep2.csv', '/content/norm_rep3.csv', '/content/norm_rep4.csv', '/content/norm_rep5.csv', '/content/norm_rep6.csv', '/content/norm_rep7.csv', '/content/norm_rep8.csv',
    '/content/sp_rep1.csv', '/content/sp_rep2.csv', '/content/sp_rep3.csv', '/content/sp_rep4.csv', '/content/sp_rep5.csv', '/content/sp_rep6.csv', '/content/sp_rep7.csv', '/content/sp_rep8.csv'
]

# Iterate over file paths, read each CSV, transform, and concatenate
transformed_dfs = [transform_dataframe(pd.read_csv(filepath)) for filepath in train_filepaths]
result = np.vstack(transformed_dfs)

result.shape

# One-hot encode Tempo

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Split the data and labels
data = result[:, :-1]
labels = result[:, -1].reshape(-1, 1)  # reshape to make it 2D

# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
labels_one_hot = encoder.fit_transform(labels)

# Concatenate the data and one-hot encoded labels
result = np.hstack((data, labels_one_hot))
result = result.astype(np.float64)

np.random.shuffle(result)

x_train_tempo = result[:, :-3]
y_train_tempo = result[:, -3:]

x_train_tempo.shape, y_train_tempo.shape

# Validation Set


In [None]:
import pandas as pd
import numpy as np

def transform_dataframe(df):
    # Extract the first column and transpose
    first_col = df.iloc[:, 0].values.reshape(1, -1)

    # Extract the fifth column's value
    fifth_col_value = df.iloc[0, 4]  # Assuming you want the first row of the fifth column

    # Combine
    combined = np.hstack((first_col, [[fifth_col_value]]))

    return combined

# List of file paths
validation_filepaths = [
    '/content/dp_rep9.csv', '/content/dp_rep10.csv',
    '/content/norm_rep9.csv', '/content/norm_rep10.csv',
    '/content/sp_rep9.csv', '/content/sp_rep10.csv'
]

# Iterate over file paths, read each CSV, transform, and concatenate
transformed_dfs = [transform_dataframe(pd.read_csv(filepath)) for filepath in validation_filepaths]
result = np.vstack(transformed_dfs)

result.shape

# One-hot encode Tempo

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Split the data and labels
data = result[:, :-1]
labels = result[:, -1].reshape(-1, 1)  # reshape to make it 2D

# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
labels_one_hot = encoder.fit_transform(labels)

# Concatenate the data and one-hot encoded labels
result = np.hstack((data, labels_one_hot))
result = result.astype(np.float64)
np.random.shuffle(result)

x_validation_tempo = result[:, :-3]
y_validation_tempo = result[:, -3:]

x_validation_tempo.shape, y_validation_tempo.shape

# Testing Data

In [None]:
import pandas as pd
import numpy as np

def transform_dataframe(df):
    # Extract the first column and transpose
    first_col = df.iloc[:, 0].values.reshape(1, -1)

    # Extract the fifth column's value
    fifth_col_value = df.iloc[0, 4]  # Assuming you want the first row of the fifth column

    # Combine
    combined = np.hstack((first_col, [[fifth_col_value]]))

    return combined

# List of file paths
test_filepaths = [
    '/content/dp_rep11.csv', '/content/dp_rep12.csv',
    '/content/norm_rep11.csv', '/content/norm_rep12.csv',
    '/content/sp_rep11.csv', '/content/sp_rep12.csv'
]

# Iterate over file paths, read each CSV, transform, and concatenate
transformed_dfs = [transform_dataframe(pd.read_csv(filepath)) for filepath in test_filepaths]
result = np.vstack(transformed_dfs)

result.shape

# One-hot encode Tempo

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Split the data and labels
data = result[:, :-1]
labels = result[:, -1].reshape(-1, 1)  # reshape to make it 2D

# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
labels_one_hot = encoder.fit_transform(labels)

# Concatenate the data and one-hot encoded labels
result = np.hstack((data, labels_one_hot))
result = result.astype(np.float64)
np.random.shuffle(result)

x_test_tempo = result[:, :-3]
y_test_tempo = result[:, -3:]

x_test_tempo.shape, y_test_tempo.shape

# Standardize the Training, Validation and Testing Data

In [None]:
def row_standardize(data):
    row_means = np.mean(data, axis=1, keepdims=True)
    row_stds = np.std(data, axis=1, keepdims=True)
    return (data - row_means) / (row_stds + 1e-10)  # adding a small value to avoid division by zero

x_train_tempo_std = row_standardize(x_train_tempo)
x_validation_tempo_std = row_standardize(x_validation_tempo)
x_test_tempo_std = row_standardize(x_test_tempo)

x_train_tempo_std.shape, x_validation_tempo_std.shape, x_test_tempo_std.shape

# Model v4.2


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa

# Define the model
model = keras.Sequential([
    layers.Input(shape=(6000,)),  # Input layer for 6000 features
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5)
    layers.Dense(3, activation='softmax')  # Output layer for 3 classes
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(num_classes=3, average='macro')])

# model summary
model.summary()

print(x_train_tempo_std.shape, y_train_tempo.shape, x_test_tempo_std.shape, y_test_tempo.shape)

# Train the model
history = model.fit(
    x_train_tempo_std, y_train_tempo,
    epochs=50,
    batch_size=8,
    validation_data=(x_validation_tempo_std, y_validation_tempo),
    verbose=1
)

# Evaluating the model

In [None]:
# Evaluate the model on the training set
train_loss, train_accuracy = model.evaluate(x_train_tempo_std, y_train_tempo)
print(f"Training accuracy: {train_accuracy * 100:.2f}%")

# Evaluate the model on the validation set
test_loss, test_accuracy = model.evaluate(x_test_tempo_std, y_test_tempo)
print(f"Validation accuracy: {test_accuracy * 100:.2f}%")


# Confusion Matrics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Predict the classes for the testing set
y_pred = model.predict(x_test_tempo_std)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert one-hot encoded ground truth labels back to class labels
y_true = np.argmax(y_test_tempo, axis=1)

# Compute the confusion matrix
confusion_mtx = tf.math.confusion_matrix(y_true, y_pred_classes)

# Visualize the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx, annot=True, fmt='g', cmap='Blues', xticklabels=['Deadpan', 'Normal', 'Speed'], yticklabels=['Deadpan', 'Normal', 'Speed'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Standardise Prediction Data

In [None]:
pred_filepaths = [
    '/content/dp_rep12.csv'
]

# Iterate over file paths, read each CSV, transform, and concatenate
transformed_dfs = [transform_dataframe(pd.read_csv(filepath)) for filepath in pred_filepaths]
result_p = np.vstack(transformed_dfs)

result_p.shape

# Split the data and labels
data = result_p[:, :-1]
labels = result_p[:, -1].reshape(-1, 1)  # reshape to make it 2D

# One-hot encode the labels
encoder = OneHotEncoder(sparse=False)
labels_one_hot = encoder.fit_transform(labels)

# Concatenate the data and one-hot encoded labels
result = np.hstack((data, labels_one_hot))
result = result.astype(np.float64)
np.random.shuffle(result)

x_pred_tempo = result_p[:, :-3]
y_pred_tempo = result_p[:, -3:]

x_pred_tempo.shape, y_pred_tempo.shape

def row_standardize(data):
    row_means = np.mean(data, axis=1, keepdims=True)
    row_stds = np.std(data, axis=1, keepdims=True)
    return (data - row_means) / (row_stds + 1e-10)  # adding a small value to avoid division by zero

x_pred_tempo = row_standardize(x_pred_tempo)

x_pred_tempo.shape

# Prediction

In [None]:
predictions = model.predict(x_pred_tempo)
print(predictions)