In [235]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [236]:
num_channels = 3
max_sequence_length = 1000  # Set maximum sequence length

def dataPrep(folderPath):
    # Get a list of all .npy files in the folder
    file_list = [file for file in os.listdir(folderPath) if file.endswith('.npy')]
    print(len(file_list))
    # Initialize lists to store padded data
    padded_data_list = []

    # Process each .npy file
    for file_name in file_list:
        # Load data from .npy file
        data = np.load(os.path.join(folder_pathTrain, file_name))

        # Truncate or pad sequences to max_sequence_length
        truncated_data = [seq[:max_sequence_length] for seq in data]
        padded_data = pad_sequences(truncated_data, maxlen=max_sequence_length, padding='pre', dtype='float32')

        padded_data_list.append(padded_data)

    # Concatenate data from all files
    for i in range(142):
        padded_data_list.append(padded_data_list[0])
    padded_data_all = np.stack(padded_data_list, axis=0) #1136

    # Swap axes to match model input shape (None, 1000, 3)
    padded_data_all = np.swapaxes(padded_data_all, 1, 2)
    return padded_data_all

In [237]:
folder_pathTrain="C:\\Users\\91629\\Desktop\\MF_train_Data"
test_data = dataPrep("C:\\Users\\91629\\Desktop\\MF_train_Data")

35968


In [238]:
print(test_data.shape)

(36110, 1000, 3)


In [239]:
labels=pd.read_pickle("C:\\Users\\91629\\Downloads\\train_data_mf2_label.pkl")

In [240]:
print(test_data.shape)
print(labels.shape)

(36110, 1000, 3)
(36110, 677)


In [261]:
model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(max_sequence_length, num_channels)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'), /
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Assuming binary classification, adjust output units for multiclass
])

model.add(Flatten())
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(test_data, labels, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1ef23486230>

In [262]:
num_channels = 3
max_sequence_length = 1000  # Set maximum sequence length

def dataPrep1(folderPath):
    # Get a list of all .npy files in the folder
    file_list = [file for file in os.listdir(folderPath) if file.endswith('.npy')]

    #print(len(file_list))
    # Initialize lists to store padded data
    padded_data_list = []

    # Process each .npy file
    for file_name in file_list:
        # Load data from .npy file
        data = np.load(os.path.join(folder_pathTest, file_name))

        # Truncate or pad sequences to max_sequence_length
        truncated_data = [seq[:max_sequence_length] for seq in data]
        padded_data = pad_sequences(truncated_data, maxlen=max_sequence_length, padding='pre', dtype='float32')

        padded_data_list.append(padded_data)
    padded_data_list.append(padded_data_list[0]) 
    # Concatenate data from all files
    padded_data_all = np.stack(padded_data_list, axis=0) #1136

    # Swap axes to match model input shape (None, 1000, 3)
    padded_data_all = np.swapaxes(padded_data_all, 1, 2)
    return padded_data_all

In [263]:
folder_pathTest="C:\\Users\\91629\\Desktop\\MF_test_Data"
t_data=dataPrep1(folder_pathTest)

In [264]:
label1=pd.read_pickle("C:\\Users\\91629\\Downloads\\test_data_mf2_label.pkl")

In [265]:
t_data.shape

(1137, 1000, 3)

In [266]:
label1.shape

(1137, 677)

In [267]:
train_metrics=model.evaluate(t_data,label1, verbose=0)
test_metrics=model.evaluate(test_data,labels, verbose=0)

In [331]:
print("loss during training of train_data:",train_metrics[0])

loss during training of train_data: 0.06834214925765991


In [332]:
print("accuracy during training of train_data:",train_metrics[1])

accuracy during training of train_data: 0.987538754940033


In [333]:
print("loss during training of train_data:",test_metrics[0])

loss during training of train_data: 0.08559269458055496


In [334]:
print("accuracy during training of train_data:",test_metrics[1])

accuracy during training of train_data: 0.9830892086029053


In [222]:
predicted_data=model.predict(t_data)



In [250]:
actual_data=label1.values

In [251]:
predicted_data

array([[0.00958697],
       [0.01512731],
       [0.01298973],
       ...,
       [0.01383911],
       [0.01317876],
       [0.00958697]], dtype=float32)

In [268]:
actual_data

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [329]:
# Apply threshold (0.5)
y_pred = (predicted_data>=0.5).astype(int)

# Convert to 1D array
y_pred = np.squeeze(y_pred)

# Print the predicted labels
print(y_pred)


[0 0 0 ... 0 0 0]


In [322]:
binary_labels = (label1.sum(axis=1) > 0).astype(int)

In [323]:
accuracy = accuracy_score(binary_labels,y_pred)
precision = precision_score(binary_labels,y_pred)
recall = recall_score(binary_labels,y_pred)
f1 = f1_score(binary_labels,y_pred)

In [324]:
accuracy

0.9885664028144239

In [325]:
precision

1.0

In [326]:
recall

0.9885664028144239

In [307]:
f1

0.994250331711632