<a href="https://colab.research.google.com/github/medazizfoudhaili/project-graph/blob/main/first_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Step 1: Upload your dataset
from google.colab import files
import pandas as pd
import io
import numpy as np
import ast

uploaded = files.upload()

# Load the CSV
filename = list(uploaded.keys())[0]
data = pd.read_csv(io.BytesIO(uploaded[filename]))
print("Original data shape:", data.shape)

# Step 2: Filter out rows with 'Unknown' in P_oR_R
data = data[data['P_oR_R'].isin(['P','R'])]
print("Filtered data shape:", data.shape)
print(data['P_oR_R'].value_counts())

# Step 3: Columns to use
sensor_cols = ['ax','ay','az','gx','gy','gz']

# Step 4: Convert string lists to numeric arrays with padding
MAX_LIST_LEN = 10  # max elements per list in a cell

def parse_and_pad(cell):
    arr = np.array(ast.literal_eval(cell), dtype=np.float32)
    if len(arr) < MAX_LIST_LEN:
        arr = np.pad(arr, (0, MAX_LIST_LEN - len(arr)), 'constant', constant_values=0)
    else:
        arr = arr[:MAX_LIST_LEN]
    return arr

for col in sensor_cols:
    data[col] = data[col].apply(parse_and_pad)

# Step 5: Build sequences
TIME_STEPS = 10
FEATURES = len(sensor_cols) * MAX_LIST_LEN

X_rows = []
y_rows = []

for i in range(0, len(data) - TIME_STEPS + 1, TIME_STEPS):
    seq_list = []
    valid = True
    for t in range(TIME_STEPS):
        row_features = []
        for col in sensor_cols:
            row_features.extend(data[col].iloc[i+t])
        if len(row_features) != FEATURES:
            valid = False
            break
        seq_list.append(row_features)
    if valid:
        X_rows.append(seq_list)
        y_rows.append(0 if data['P_oR_R'].iloc[i + TIME_STEPS - 1]=='P' else 1)  # Encode labels

X = np.array(X_rows, dtype=np.float32)
y = np.array(y_rows)
print("X shape:", X.shape)
print("y shape:", y.shape)
print("Unique labels:", np.unique(y))

# Step 6: Build a CNN-LSTM model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(TIME_STEPS, FEATURES)),
    MaxPooling1D(pool_size=2),
    LSTM(50, return_sequences=False),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Step 7: Train the model
history = model.fit(X, y, epochs=10, batch_size=16)

# Step 8: Evaluate
loss, acc = model.evaluate(X, y)
print("Accuracy:", acc)

# Confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = (model.predict(X) > 0.5).astype(int)
cm = confusion_matrix(y, y_pred)
print("Confusion Matrix:\n", cm)


Saving NV06Data_readyToTrainwith.csv to NV06Data_readyToTrainwith (5).csv
Original data shape: (10254, 11)
Filtered data shape: (4525, 11)
P_oR_R
P    3395
R    1130
Name: count, dtype: int64
X shape: (452, 10, 60)
y shape: (452,)
Unique labels: [0 1]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.6558 - loss: 0.6308
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7433 - loss: 0.5661
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7597 - loss: 0.5190
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7378 - loss: 0.5530
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8020 - loss: 0.4591
Epoch 6/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7909 - loss: 0.4940
Epoch 7/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7832 - loss: 0.4659
Epoch 8/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8039 - loss: 0.4164
Epoch 9/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[