In [1]:
import pandas as pd
import numpy as np
import h5py
import cv2
import numpy as np
from skimage.feature import local_binary_pattern#, greycomatrix, greycoprops
from sklearn.decomposition import PCA

In [2]:
subject = "m292"

In [3]:
dataframes = {}

label_list = [0,1,2,3,4] 

n_folds = 5

for label in label_list:
    filename = "Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    dataframes[label] = file['Data']

# Compute segments & Define functions

In [4]:
# TODO try different number of bins than 256

In [5]:
# Function to segment the dataset into smaller chunks

def segment_data(data, segment_size = 80):
    """
    Segment the dataset into smaller chunks.

    Parameters:
    - data (ndarray): Input data to be segmented.
    - segment_size (int): Size of each segment.

    Returns:
    - segments (list): List of segmented data.
    """
    
    segments = []

    for i in range(75):
        start_idx = int(i* segment_size)
        end_idx = start_idx + segment_size
        segment = data[start_idx:end_idx, :, :]
        segments.append(list(segment))

    return np.array(segments)

segments = {}

for label in label_list:
    segments[label] = segment_data(dataframes[label])

In [6]:
def average_features_over_frames(features):
    """ Takes average of each feature bin over all 80 images
    that are within one 4s interval
    """

    averaged_features = []

    for segment in features:
        average_array = np.mean(segment, axis=0)
        averaged_features.append(average_array)

    return averaged_features

In [7]:
def compute_and_average_features(func, label_list = label_list, segments = segments, **kwargs):
    """
    Compute and average features using the provided function.

    Parameters:
    - label_list (list): List of labels.
    - segments (dict): Dictionary of segments corresponding to each label.
    - func (function): Function to compute features from a segment.
    - kwargs: Additional keyword arguments to pass to the function.

    Returns:
    - features (dict): Dictionary containing averaged features for each label.
    """
    features = {}

    for label in label_list:
        label_features = []
        for segment in segments[label]:
            feature = func(segment, **kwargs)
            label_features.append(feature)

        label_features = np.array(label_features)
        label_features = average_features_over_frames(label_features)
        features[label] = label_features

    return features

# Features

## Intensity histograms

In [8]:
def extract_intensity_histograms(segment, is_grayscale=True):
    histograms = []
    for frame in segment:
        if is_grayscale:
            hist = cv2.calcHist([frame.astype(np.uint8)], [0], None, [256], [0, 256])
            histograms.append(hist.flatten())
        else:
            # Convert frame to grayscale
            gray_frame = cv2.cvtColor(frame.astype(np.uint8), cv2.COLOR_BGR2GRAY)

            # Calculate intensity histogram
            hist_intensity = cv2.calcHist([gray_frame], [0], None, [256], [0, 256])

            # Flatten and append histogram
            histograms.append(hist_intensity.flatten())

    return np.array(histograms)

hist_features = compute_and_average_features(extract_intensity_histograms)

## Optical flow

In [9]:
# Function to extract optical flow features from a grayscale video segment
def extract_optical_flow_features(segment, n_components=75):
    features = []
    prev_frame = None
    for frame in segment:
        if prev_frame is not None:
            # Calculate optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

            # Flatten and append optical flow as feature
            features.append(flow.flatten())

        prev_frame = frame

    # Convert features to numpy array
    features = np.array(features)

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=n_components)
    features_reduced = pca.fit_transform(features)

    return features_reduced


optical_flow_features = compute_and_average_features(extract_optical_flow_features)

## Spatiotemporal features

In [10]:
"""Computationally to inefficient
def extract_spatiotemporal_features(segment):
    Extract spatiotemporal features from a video segment.

    Parameters:
    - segment (list): List of grayscale frames representing the video segment.

    Returns:
    - features (ndarray): Array containing spatiotemporal features.
    
    features = []
    for frame in segment:
        # Ensure frame is in the correct data type (CV_8U)
        frame = frame.astype(np.uint8)
        
        # Calculate HOG features for the frame
        hog = cv2.HOGDescriptor()
        hist = hog.compute(frame)

        features.append(hist.flatten())

    return np.array(features)

spatiotemporal_features = compute_and_average_features(extract_spatiotemporal_features)

"""


'Computationally to inefficient\ndef extract_spatiotemporal_features(segment):\n    Extract spatiotemporal features from a video segment.\n\n    Parameters:\n    - segment (list): List of grayscale frames representing the video segment.\n\n    Returns:\n    - features (ndarray): Array containing spatiotemporal features.\n    \n    features = []\n    for frame in segment:\n        # Ensure frame is in the correct data type (CV_8U)\n        frame = frame.astype(np.uint8)\n        \n        # Calculate HOG features for the frame\n        hog = cv2.HOGDescriptor()\n        hist = hog.compute(frame)\n\n        features.append(hist.flatten())\n\n    return np.array(features)\n\nspatiotemporal_features = compute_and_average_features(extract_spatiotemporal_features)\n\n'

# Texture features

## Local Binary Patterns (LBP)

In [11]:
def extract_lbp_features(segment):
    features = []
    for frame in segment:
        # Compute LBP
        lbp = local_binary_pattern(frame, 8, 1, method='uniform')
        
        # Calculate histogram of LBP
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 59), range=(0, 58))

        # Normalize histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        # Flatten and append LBP histogram as feature
        features.append(hist)

    return np.array(features)


lbp_features = compute_and_average_features(extract_lbp_features)



## Gray-Level Co-occurrence Matrix (GLCM)

In [12]:
""" Import Issues

def extract_glcm_features(segment, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], symmetric=True, normed=True):
    features = []
    for frame in segment:
        # Compute GLCM
        glcm = greycomatrix(frame, distances=distances, angles=angles, symmetric=symmetric, normed=normed)

        # Calculate GLCM properties
        properties = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
        glcm_props = [greycoprops(glcm, prop).ravel() for prop in properties]

        # Flatten and append GLCM properties as features
        features.append(np.hstack(glcm_props))

    return np.array(features)


glcm_features = compute_and_average_features(extract_glcm_features)

"""

" Import Issues\n\ndef extract_glcm_features(segment, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], symmetric=True, normed=True):\n    features = []\n    for frame in segment:\n        # Compute GLCM\n        glcm = greycomatrix(frame, distances=distances, angles=angles, symmetric=symmetric, normed=normed)\n\n        # Calculate GLCM properties\n        properties = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']\n        glcm_props = [greycoprops(glcm, prop).ravel() for prop in properties]\n\n        # Flatten and append GLCM properties as features\n        features.append(np.hstack(glcm_props))\n\n    return np.array(features)\n\n\nglcm_features = compute_and_average_features(extract_glcm_features)\n\n"

# Edge features

In [13]:
def extract_edge_features(segment, threshold1=100, threshold2=200):
    features = []
    for frame in segment:
        # Ensure frame is in the correct data type (CV_8U)
        frame = frame.astype(np.uint8)
        
        # Apply Canny edge detection
        edges = cv2.Canny(frame, threshold1, threshold2)
        
        # Calculate histogram of edge pixels
        hist, _ = np.histogram(edges.ravel(), bins=2, range=(0, 255))

        # Normalize histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        # Append edge histogram as feature
        features.append(hist)

    return np.array(features)

edge_features = compute_and_average_features(extract_edge_features)

# Corner features

In [14]:
def extract_corner_features(segment, max_corners=100, quality_level=0.01, min_distance=10):
    features = []
    for frame in segment:
        # Ensure frame is in the correct data type (CV_8U)
        frame = frame.astype(np.uint8)
        
        # Detect corners using Harris corner detector
        corners = cv2.goodFeaturesToTrack(frame, maxCorners=max_corners, qualityLevel=quality_level, minDistance=min_distance)

        # If corners are found, append them as features
        if corners is not None:
            corners = np.int0(corners)
            features.append(corners)

    return np.array(features)


corner_features = compute_and_average_features(extract_corner_features)

  corners = np.int0(corners)


# Create Feature DF and save

In [15]:
def choose_column_in_matrix(matrix, i):
    return [row[i] for row in matrix]

In [16]:
def create_feature_df_all_labels(histogram_features_dict, optical_flow_features_dict, lbp_features_dict, edge_features_dict, corner_features_dict, label_list):
    """
    Create DataFrame for each label from features

    Parameters:
    - histogram_features_dict (dict): Dictionary containing histogram features for each label.
    - optical_flow_features_dict (dict): Dictionary containing optical flow features for each label.
    - spatiotemporal_features_dict (dict): Dictionary containing spatiotemporal features for each label.
    - label_list (list): List of labels for which we want to create dataframes.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_dfs = []
    
    for label in label_list:
        histogram_features = histogram_features_dict[label]
        optical_flow_features = optical_flow_features_dict[label]
        lbp_features = lbp_features_dict[label]
        edge_features = edge_features_dict[label]
        corner_features = corner_features_dict[label]
        
        feature_df = pd.DataFrame()
        
        for dim_idx in range(256):
            feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)

        for dim_idx in range(75):
            feature_df["Optical_Flow_"+str(dim_idx)] = choose_column_in_matrix(optical_flow_features, dim_idx)

        for dim_idx in range(58):
            feature_df["LBP_Features_"+str(dim_idx)] = choose_column_in_matrix(lbp_features, dim_idx)

        for dim_idx in range(2):
            feature_df["Edge_Features_"+str(dim_idx)] = choose_column_in_matrix(edge_features, dim_idx)

        for dim_idx in range(100):
            for col_idx in range(2):
                feature_df["Corner_Features_"+str(dim_idx)+"_"+str(col_idx)] = [row[0][col_idx] for row in choose_column_in_matrix(corner_features, dim_idx)]

        # Label
        feature_df["Label"] = label
        
        feature_dfs.append(feature_df)
    
    # Concatenate dataframes for all labels
    concatenated_df = pd.concat(feature_dfs, ignore_index=True)
    
    return concatenated_df

dataframes = create_feature_df_all_labels(hist_features, optical_flow_features, lbp_features, edge_features, corner_features, label_list)

  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_

In [17]:
dataframes.to_csv("Features/"+str(subject)+"/Traditional_Features.csv")