In [37]:
import pandas as pd
import numpy as np
import h5py
import cv2
import numpy as np

In [38]:
subject = "m292"

In [39]:
dataframes = {}

label_list = [0,1,2,3,4] 

n_folds = 5

for label in label_list:
    filename = "Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    dataframes[label] = file['Data']

# Compute segments

In [40]:
# Function to segment the dataset into smaller chunks
def segment_data(data, segment_size = 80):
    segments = []

    for i in range(75):
        start_idx = int(i* segment_size)
        end_idx = start_idx + segment_size
        segment = data[start_idx:end_idx, :, :]
        segments.append(list(segment))

    return np.array(segments)

segments = {}

for label in label_list:
    segments[label] = segment_data(dataframes[label])


# Features

## Color histograms

In [None]:
# TODO probably do not make sense for black & white images?

In [41]:
# Function to extract color histograms from a video segment
def extract_color_histograms(segment, is_grayscale=True):
    histograms = []
    for frame in segment:
        if is_grayscale:
            hist = cv2.calcHist([frame.astype(np.uint8)], [0], None, [256], [0, 256])
            histograms.append(hist.flatten())
        else:
            # Convert frame to HSV color space
            hsv_frame = cv2.cvtColor(frame.astype(np.uint8), cv2.COLOR_BGR2HSV)

            # Calculate histogram for each channel (Hue, Saturation, Value)
            hist_hue = cv2.calcHist([hsv_frame], [0], None, [256], [0, 256])
            hist_saturation = cv2.calcHist([hsv_frame], [1], None, [256], [0, 256])
            hist_value = cv2.calcHist([hsv_frame], [2], None, [256], [0, 256])

            # Concatenate histograms into a single feature vector
            histogram = np.concatenate((hist_hue.flatten(), hist_saturation.flatten(), hist_value.flatten()))
            histograms.append(histogram)

    return np.array(histograms)


features = {}
for label in label_list:
    features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        features[label].append(histograms)
    
    features[label] = np.array(features[label])




In [42]:
# Take average

def average_features_over_frames(features):

    averaged_features = []

    for segment in features:
        average_array = np.mean(segment, axis=0)
        averaged_features.append(average_array)

    return averaged_features
    
hist_features = {}

for label in label_list:
    hist_features[label] = average_features_over_frames(features[label])

## Optical flow

In [43]:
# Function to extract optical flow features from a grayscale video segment
def extract_optical_flow_features(segment):
    features = []
    prev_frame = None
    for frame in segment:
        if prev_frame is not None:
            # Calculate optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

            # Flatten and append optical flow as feature
            features.append(flow.flatten())

        prev_frame = frame

    return np.array(features)

optical_flow_features = {}
for label in label_list:
    optical_flow_features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        optical_flow_features[label].append(histograms)
    
    optical_flow_features[label] = np.array(optical_flow_features[label])


In [44]:
for label in label_list:
    optical_flow_features[label] = average_features_over_frames(optical_flow_features[label])

## Spatiotemporal features

In [45]:
# Function to extract 3D HOG features from a video segment
def extract_spatiotemporal_features(segment):
    features = []
    for frame in segment:
        # Convert the frame to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Duplicate the single-channel grayscale image to simulate a color image
        frame_color = cv2.merge((frame_gray, frame_gray, frame_gray))

        # Calculate HOG features for each frame
        hog = cv2.HOGDescriptor()
        hist = hog.compute(frame_color)

        features.append(hist.flatten())

    return np.array(features)

spatiotemporal_features = {}

for label in label_list:
    spatiotemporal_features[label] = []
    for segment in segments[label]:
        histograms = extract_color_histograms(segment)
        spatiotemporal_features[label].append(histograms)
    
    spatiotemporal_features[label] = np.array(spatiotemporal_features[label])


In [46]:
for label in label_list:
        spatiotemporal_features[label] = average_features_over_frames(spatiotemporal_features[label])

# Create Feature DF and save

In [83]:
def choose_column_in_matrix(matrix, i):
    return [row[i] for row in matrix]

In [73]:
histogram_features = features[0]

for dim_idx in range(1):
    print(len(choose_column_in_matrix(histogram_features, dim_idx)))


TypeError: object of type 'numpy.float32' has no len()

In [113]:
def create_feature_df_all_labels(histogram_features_dict, optical_flow_features_dict, spatiotemporal_features_dict, label_list):
    """
    Create DataFrame for each label from features

    Parameters:
    - histogram_features_dict (dict): Dictionary containing histogram features for each label.
    - optical_flow_features_dict (dict): Dictionary containing optical flow features for each label.
    - spatiotemporal_features_dict (dict): Dictionary containing spatiotemporal features for each label.
    - label_list (list): List of labels for which we want to create dataframes.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_dfs = []
    
    for label in label_list:
        histogram_features = histogram_features_dict[label]
        optical_flow_features = optical_flow_features_dict[label]
        spatiotemporal_features = spatiotemporal_features_dict[label]
        
        feature_df = pd.DataFrame()
        
        for dim_idx in range(256):
            feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)

        for dim_idx in range(256):
            feature_df["Optical_Flow_"+str(dim_idx)] = choose_column_in_matrix(optical_flow_features, dim_idx)

        for dim_idx in range(256):
            feature_df["Spatiotemporal_Features_"+str(dim_idx)] = choose_column_in_matrix(spatiotemporal_features, dim_idx)

        # Label
        feature_df["Label"] = label
        
        feature_dfs.append(feature_df)
    
    # Concatenate dataframes for all labels
    concatenated_df = pd.concat(feature_dfs, ignore_index=True)
    
    return concatenated_df

dataframes = create_feature_df_all_labels(hist_features, optical_flow_features, spatiotemporal_features, label_list)

  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Color_Histograms_Dim_"+str(dim_i

In [115]:
dataframes.to_csv("Features/"+str(subject)+"/Traditional_Features.csv")