In [3]:
import pandas as pd
import numpy as np
import h5py
import cv2
import numpy as np

In [4]:
subject = "m294"

In [5]:
dataframes = {}

label_list = [0,1,2,3,4] 

n_folds = 5

for label in label_list:
    filename = "Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    dataframes[label] = file['Data']

# Compute segments

In [6]:
# Function to segment the dataset into smaller chunks
def segment_data(data, segment_size = 80):
    segments = []

    for i in range(75):
        start_idx = int(i* segment_size)
        end_idx = start_idx + segment_size
        segment = data[start_idx:end_idx, :, :]
        segments.append(list(segment))

    return np.array(segments)

segments = {}

for label in label_list:
    segments[label] = segment_data(dataframes[label])


# Features

## Color histograms

In [8]:
import cv2
import numpy as np

# Function to extract color histograms from a video segment
def extract_color_histograms(segment, is_grayscale=True):
    histograms = []
    for frame in segment:
        if is_grayscale:
            hist = cv2.calcHist([frame.astype(np.uint8)], [0], None, [256], [0, 256])
            histograms.append(hist.flatten())
        else:
            # Convert frame to HSV color space
            hsv_frame = cv2.cvtColor(frame.astype(np.uint8), cv2.COLOR_BGR2HSV)

            # Calculate histogram for each channel (Hue, Saturation, Value)
            hist_hue = cv2.calcHist([hsv_frame], [0], None, [256], [0, 256])
            hist_saturation = cv2.calcHist([hsv_frame], [1], None, [256], [0, 256])
            hist_value = cv2.calcHist([hsv_frame], [2], None, [256], [0, 256])

            # Concatenate histograms into a single feature vector
            histogram = np.concatenate((hist_hue.flatten(), hist_saturation.flatten(), hist_value.flatten()))
            histograms.append(histogram)

    return np.array(histograms)


features = {}
for label in label_list:
    features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        features[label].append(histograms)
    
    features[label] = np.array(features[label])




In [30]:
# Take average

def average_features_over_frames(features):

    averaged_features = []

    for segment in features:
        average_array = np.mean(segment, axis=0)
        averaged_features.append(average_array)

    return averaged_features
    
hist_features = {}

for label in label_list:
    hist_features[label] = average_features_over_frames(features[label])

## Optical flow

In [9]:
# Function to extract optical flow features from a grayscale video segment
def extract_optical_flow_features(segment):
    features = []
    prev_frame = None
    for frame in segment:
        if prev_frame is not None:
            # Calculate optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

            # Flatten and append optical flow as feature
            features.append(flow.flatten())

        prev_frame = frame

    return np.array(features)

optical_flow_features = {}
for label in label_list:
    optical_flow_features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        optical_flow_features[label].append(histograms)
    
    optical_flow_features[label] = np.array(optical_flow_features[label])


In [33]:
for label in label_list:
    optical_flow_features[label] = average_features_over_frames(optical_flow_features[label])

## Spatiotemporal features

In [10]:
# Function to extract 3D HOG features from a video segment
def extract_spatiotemporal_features(segment):
    features = []
    for frame in segment:
        # Convert the frame to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Duplicate the single-channel grayscale image to simulate a color image
        frame_color = cv2.merge((frame_gray, frame_gray, frame_gray))

        # Calculate HOG features for each frame
        hog = cv2.HOGDescriptor()
        hist = hog.compute(frame_color)

        features.append(hist.flatten())

    return np.array(features)

spatiotemporal_features = {}
for label in label_list:
    spatiotemporal_features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        spatiotemporal_features[label].append(histograms)
    
    spatiotemporal_features[label] = np.array(spatiotemporal_features[label])


In [34]:
for label in label_list:
        spatiotemporal_features[label] = average_features_over_frames(spatiotemporal_features[label])

# Create Feature DF and save

In [35]:
def choose_column_in_matrix(matrix, i):
    return [row[i] for row in matrix]

In [36]:
def create_feature_df(histogram_features, optical_flow_features, spatiotemporal_features, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame()

    for dim_idx in range(256):
        feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
    feature_df["Optical Flow"] = optical_flow_features
    feature_df["Spatiotemporal"] = spatiotemporal_features

    # Label
    feature_df["Label"] = label



    return feature_df

In [37]:
dataframes= {}

for label in label_list:
    dataframes[label] = create_feature_df(features[label],  \
                optical_flow_features[label], spatiotemporal_features[label], label)
    

IndexError: index 80 is out of bounds for axis 0 with size 80

In [None]:
# Concatenate and save features of training persistence diagrams
feature_df = pd.concat([dataframes[0], dataframes[1], dataframes[2], dataframes[3], dataframes[4]], ignore_index=True)

feature_df.to_csv("Features/"+str(subject)+"/Traditional_Features.csv")