<a href="https://colab.research.google.com/github/sp2005-im/Machine-Learning-Basics-Regression-And-Classification/blob/main/Microfluids-and-ML/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Sri Rama Jayam
#Checks
!pip show tensorflow

Name: tensorflow
Version: 2.18.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.11/dist-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, requests, setuptools, six, tensorboard, tensorflow-io-gcs-filesystem, termcolor, typing-extensions, wrapt
Required-by: dopamine_rl, tensorflow-text, tf_keras


In [2]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
#Sri Rama Jayam
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, concatenate
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [3]:
def load_data(directory, cell_type):
  position_data = []
  velocity_data = []
  labels = []
  for filename in os.listdir(directory):
    if filename.endswith('.csv'):
      df = pd.read_csv(os.path.join(directory, filename))
      position = df[['X','Y','Z']].values
      velocity = df[['velocity_x', 'velocity_y', 'velocity_z']].values
      position_data.append(position)
      velocity_data.append(velocity)
      labels.append(cell_type)
  return position_data, velocity_data, labels


In [4]:
soft_position, soft_velocity, soft_labels = load_data('/content/drive/MyDrive/NewTimeSeriesDataForCells/soft_dir',0)
rigid_position, rigid_velocity, rigid_labels = load_data('/content/drive/MyDrive/NewTimeSeriesDataForCells/hard_dir',1)

In [5]:
type(soft_labels)

list

In [6]:
all_position_data = soft_position + rigid_position
all_velocity_data = soft_velocity + rigid_velocity
all_labels = soft_labels + rigid_labels

In [7]:
type(all_position_data[0])

numpy.ndarray

In [8]:
type(all_velocity_data[0])

numpy.ndarray

In [9]:
max_length = max(max(len(seq) for seq in all_position_data), max(len(seq) for seq in all_velocity_data))
padded_position_data = pad_sequences(all_position_data, maxlen = max_length, dtype = 'float32', padding = 'post', truncating = 'post')
padded_velocity_data = pad_sequences(all_velocity_data, maxlen = max_length, dtype = 'float32', padding = 'post', truncating = 'post')

In [10]:
len(padded_position_data)

590

In [11]:
type(padded_position_data)

numpy.ndarray

In [12]:
padded_position_data[8].shape

(431, 3)

In [13]:
padded_velocity_data[0].shape

(431, 3)

In [14]:
position_scaler = StandardScaler()
velocity_scaler = StandardScaler()

In [15]:
normalized_position_data = np.array([position_scaler.fit_transform(seq) for seq in padded_position_data])
normalized_velocity_data = np.array([velocity_scaler.fit_transform(seq) for seq in padded_velocity_data])

In [16]:
normalized_position_data.shape

(590, 431, 3)

In [17]:
normalized_velocity_data.shape

(590, 431, 3)

In [18]:
labels = to_categorical(all_labels)

In [19]:
labels.shape

(590, 2)

In [20]:
X_pos_train, X_pos_test, X_vel_train, X_vel_test, y_train, y_test = train_test_split(
    normalized_position_data, normalized_velocity_data, labels, test_size=0.2, random_state=42) # 80 % for training and 20 % for testing

In [21]:
def create_model(input_shape):
    position_input = Input(shape=input_shape)
    velocity_input = Input(shape=input_shape)

    # Improved position branch with deeper architecture
    x_pos = Conv1D(32, kernel_size=3, padding='same', activation='relu')(position_input)
    x_pos = BatchNormalization()(x_pos)
    x_pos = Conv1D(64, kernel_size=3, padding='same', activation='relu')(x_pos)
    x_pos = BatchNormalization()(x_pos)
    x_pos = MaxPooling1D(2)(x_pos)
    x_pos = Dropout(0.2)(x_pos)
    x_pos = Conv1D(128, kernel_size=3, padding='same', activation='relu')(x_pos)
    x_pos = BatchNormalization()(x_pos)
    x_pos = GlobalAveragePooling1D()(x_pos)

    # Improved velocity branch with deeper architecture
    x_vel = Conv1D(32, kernel_size=3, padding='same', activation='relu')(velocity_input)
    x_vel = BatchNormalization()(x_vel)
    x_vel = Conv1D(64, kernel_size=3, padding='same', activation='relu')(x_vel)
    x_vel = BatchNormalization()(x_vel)
    x_vel = MaxPooling1D(2)(x_vel)
    x_vel = Dropout(0.2)(x_vel)
    x_vel = Conv1D(128, kernel_size=3, padding='same', activation='relu')(x_vel)
    x_vel = BatchNormalization()(x_vel)
    x_vel = GlobalAveragePooling1D()(x_vel)

    # Combine features
    combined = concatenate([x_pos, x_vel])
    combined = Dropout(0.3)(combined)

    # Dense layers with better regularization
    x = Dense(256, activation='relu')(combined)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    output = Dense(2, activation='softmax')(x)

    model = Model(inputs=[position_input, velocity_input], outputs=output)
    return model


def create_all_outputs_model(model):
  return Model(inputs=model.inputs, outputs=[layer.output for layer in model.layers])



In [22]:
input_shape = (max_length,3)
model = create_model(input_shape)

In [23]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

In [24]:
class TestAccuracyCallback(tf.keras.callbacks.Callback):
  def __init__(self, test_data):
    self.test_data = test_data
    self.test_accuracies = []

  def on_epoch_end(self, epoch, logs = None):
    X_pos_test, X_vel_test, y_test = self.test_data
    test_data, test_accuracy = self.model.evaluate([X_pos_test, X_vel_test], y_test, verbose = 0)
    self.test_accuracies.append(test_accuracy)
    print(f'\n Test accuracy at epoch {epoch+1}: {test_accuracy:.4f}')

test_accuracy_callback = TestAccuracyCallback((X_pos_test, X_vel_test, y_test))


In [25]:
cnn_history = model.fit(
    [X_pos_train, X_vel_train], y_train,
    epochs = 400,
    batch_size = 32,
    validation_split = 0.2,
    verbose = 1,
    callbacks = [test_accuracy_callback]
)
all_outputs_model = create_all_outputs_model(model)

Epoch 1/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 675ms/step - accuracy: 0.4848 - loss: 1.1051
 Test accuracy at epoch 1: 0.5254
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 892ms/step - accuracy: 0.4857 - loss: 1.1021 - val_accuracy: 0.5684 - val_loss: 0.6896
Epoch 2/400
[1m 9/12[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 13ms/step - accuracy: 0.5793 - loss: 0.9224
 Test accuracy at epoch 2: 0.5593
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - accuracy: 0.5846 - loss: 0.9078 - val_accuracy: 0.6526 - val_loss: 0.6914
Epoch 3/400
[1m 9/12[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 13ms/step - accuracy: 0.5605 - loss: 0.8036
 Test accuracy at epoch 3: 0.4237
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5708 - loss: 0.7808 - val_accuracy: 0.4632 - val_loss: 0.6932
Epoch 4/400
[1m 9/12[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 13ms/step