In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import Sequence
from concurrent.futures import ThreadPoolExecutor
import logging

logging.basicConfig(level=logging.DEBUG)

class VideoLSTMClassifier:
    def __init__(self, train_df, test_df, n_features, lstm_units=50):
        self.train_df = train_df
        self.test_df = test_df
        self.n_features = n_features
        self.lstm_units = lstm_units
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(LSTM(self.lstm_units, activation='relu', input_shape=(None, self.n_features)))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def generator(self, df):
        while True:
            for _, group in df.groupby("capture_id"):
                group = group.sort_values(by="frame_id")  # Ensure frames are in chronological order
                
                # Excluding capture_id, frame_id, and label columns to get feature values
                X = group.drop(columns=["capture_id", "frame_id", "label"]).values 
                
                # Assuming each video (or capture_id) has a single label for the entire video
                y = group["label"].values[0]
                
                yield X[np.newaxis, :, :], np.array([[y]])

    def train(self, epochs=10):
        logging.debug("Training started...")
        train_gen = self.generator(self.train_df)
        n_samples = self.train_df["capture_id"].nunique()
        self.model.fit(train_gen, steps_per_epoch=n_samples, epochs=epochs)
        logging.debug("Training completed.")

    def evaluate(self):
        logging.debug("Evaluation started...")
        test_gen = self.generator(self.test_df)
        n_samples = self.test_df["capture_id"].nunique()
        results = self.model.evaluate(test_gen, steps=n_samples)
        logging.debug(f"Loss: {results[0]}, Accuracy: {results[1]}")
        return results

    def predict(self, video_data):
        return self.model.predict(video_data)

    def multi_threaded_predict(self, num_threads=4):
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            video_groups = [group for _, group in self.test_df.groupby("capture_id")]
            predictions = list(executor.map(self.predict, video_groups))
        return predictions

# Example Usage:
# Assuming `train_data` and `test_data` are your two dataframes
classifier = VideoLSTMClassifier(train_data, test_data, n_features=train_data.shape[1] - 3)  # Excluded one more column: "frame_id"
classifier.train(epochs=5)
classifier.evaluate()
predictions = classifier.multi_threaded_predict(num_threads=8)

NameError: name 'train_data' is not defined

: 