In [1]:
import pandas as pd
import numpy as np
import h5py
import cv2
import numpy as np
from skimage.feature import local_binary_pattern#, greycomatrix, greycoprops
from sklearn.decomposition import PCA
from scipy.spatial import ConvexHull
from scipy.spatial.distance import pdist, squareform
from scipy.stats import skew, kurtosis, mode, entropy


In [2]:
subject = "m300"

In [3]:
dataframes = {}

label_list = [0,1,2,3,4] 

for label in label_list:
    filename = "Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    dataframes[label] = file['Data']

# Compute segments & Define functions

In [4]:
# Function to segment the dataset into smaller chunks

def segment_data(data, segment_size = 80):
    """
    Segment the dataset into smaller chunks.

    Parameters:
    - data (ndarray): Input data to be segmented.
    - segment_size (int): Size of each segment.

    Returns:
    - segments (list): List of segmented data.
    """
    
    segments = []

    for i in range(75):
        start_idx = int(i* segment_size)
        end_idx = start_idx + segment_size
        segment = data[start_idx:end_idx, :, :]
        segments.append(list(segment))

    return np.array(segments)

segments = {}

for label in label_list:
    segments[label] = segment_data(dataframes[label])

In [5]:
def average_features_over_frames(features):
    """ Takes average of each feature bin over all 80 images
    that are within one 4s interval
    """

    averaged_features = []

    for segment in features:
        average_array = np.mean(segment, axis=0)
        averaged_features.append(average_array)

    return averaged_features

In [6]:
def compute_and_average_features(func, label_list = label_list, segments = segments, **kwargs):
    """
    Compute and average features using the provided function.

    Parameters:
    - label_list (list): List of labels.
    - segments (dict): Dictionary of segments corresponding to each label.
    - func (function): Function to compute features from a segment.
    - kwargs: Additional keyword arguments to pass to the function.

    Returns:
    - features (dict): Dictionary containing averaged features for each label.
    """
    features = {}

    for label in label_list:
        label_features = []
        for segment in segments[label]:
            feature = func(segment, **kwargs)
            label_features.append(feature)
    
        label_features = np.array(label_features)
        label_features = average_features_over_frames(label_features)
        features[label] = label_features

    return features

# Features

## Corner Detection

In [7]:
def extract_keypoints(segment):
    keypoints = []
    for frame in segment:
        # Initialize the FAST detector
        fast = cv2.FastFeatureDetector_create()
        # Detect keypoints
        keypoints_for_frame = fast.detect(frame.astype(np.uint8), None)
        # Get coordinates and sizes of keypoints
        keypoints_for_frame = [[kp.pt[0], kp.pt[1]] for kp in keypoints_for_frame[:500]] # cut to lenght 500
        keypoints.append(keypoints_for_frame)

    return keypoints

keypoint_features = compute_and_average_features(extract_keypoints)

In [8]:
def extract_corner_features(segment, max_corners=100, quality_level=0.01, min_distance=10):
    features = []
    for frame in segment:
        # Ensure frame is in the correct data type (CV_8U)
        frame = frame.astype(np.uint8)
        
        # Detect corners using Harris corner detector
        corners = cv2.goodFeaturesToTrack(frame, maxCorners=max_corners, qualityLevel=quality_level, minDistance=min_distance)

        # If corners are found, append them as features
        if corners is not None:
            corners = np.int0(corners)
            features.append(corners[:, 0])

    return np.array(features)


corner_features = compute_and_average_features(extract_corner_features)

  corners = np.int0(corners)


In [9]:
def compute_corner_features(frames):
    # Initialize dictionary for features
    features = {
        "corner_counts": [],
        "mean_corner_position_xs": [],
        "mean_corner_position_ys": [],
        "corner_densities": [],
        "convex_hull_areas": [],
        "bounding_box_areas": [],
        "sd_positions_xs": [],
        "sd_positions_ys": [],
        "mean_distances": [],
        "sd_distances": [],
        "mean_angles": [],
        "sd_angles": []
    }

    for frame in frames:
        if frame.size == 0:
            # Handle case where there are no corners
            features["corner_counts"].append(0)
            features["mean_corner_position_xs"].append(0)
            features["mean_corner_position_ys"].append(0)
            features["corner_densities"].append(0)
            features["convex_hull_areas"].append(0)
            features["bounding_box_areas"].append(0)
            features["sd_positions_xs"].append(0)
            features["sd_positions_ys"].append(0)
            features["mean_distances"].append(0)
            features["sd_distances"].append(0)
            features["mean_angles"].append(0)
            features["sd_angles"].append(0)
            continue

        # Corner count
        num_corners = frame.shape[0]
        features["corner_counts"].append(num_corners)

        # Corner positions
        positions = frame[:, :2]

        # Mean and standard deviation of corner positions
        mean_position = np.mean(positions, axis=0)
        features["mean_corner_position_xs"].append(mean_position[0])
        features["mean_corner_position_ys"].append(mean_position[1])
        std_position = np.std(positions, axis=0)
        features["sd_positions_xs"].append(std_position[0])
        features["sd_positions_ys"].append(std_position[1])

        # Corner density
        image_area = 160 * 120  # Assuming a fixed image size, adjust if necessary
        corner_density = num_corners / image_area
        features["corner_densities"].append(corner_density)

        # Convex hull area
        if num_corners > 2:
            hull = ConvexHull(positions)
            convex_hull_area = hull.area
        else:
            convex_hull_area = 0
        features["convex_hull_areas"].append(convex_hull_area)

        # Bounding box area
        min_x, min_y = np.min(positions, axis=0)
        max_x, max_y = np.max(positions, axis=0)
        bounding_box_area = (max_x - min_x) * (max_y - min_y)
        features["bounding_box_areas"].append(bounding_box_area)

        # Distance statistics
        if num_corners > 1:
            dists = pdist(positions)
            mean_distance = np.mean(dists)
            std_distance = np.std(dists)
        else:
            mean_distance = std_distance = 0
        features["mean_distances"].append(mean_distance)
        features["sd_distances"].append(std_distance)

        # Angle statistics
        if num_corners > 1:
            angles = []
            for i in range(num_corners):
                for j in range(i + 1, num_corners):
                    dx = positions[j, 0] - positions[i, 0]
                    dy = positions[j, 1] - positions[i, 1]
                    angle = np.arctan2(dy, dx) * 180 / np.pi
                    angles.append(angle)
            mean_angle = np.mean(angles)
            std_angle = np.std(angles)
        else:
            mean_angle = std_angle = 0
        features["mean_angles"].append(mean_angle)
        features["sd_angles"].append(std_angle)

    return features
    
FAST_corner_statistics = {}
most_prominent_corner_statistics = {}

for label in label_list:
    FAST_corner_statistics[label] = compute_corner_features(keypoint_features[label])
    most_prominent_corner_statistics[label] = compute_corner_features(corner_features[label])

## Intensity histograms

In [10]:
def extract_intensity_histograms(segment, is_grayscale=True):
    histograms = []
    for frame in segment:
        if is_grayscale:
            hist = cv2.calcHist([frame.astype(np.uint8)], [0], None, [256], [0, 256])
            histograms.append(hist.flatten())
        else:
            # Convert frame to grayscale
            gray_frame = cv2.cvtColor(frame.astype(np.uint8), cv2.COLOR_BGR2GRAY)

            # Calculate intensity histogram
            hist_intensity = cv2.calcHist([gray_frame], [0], None, [256], [0, 256])

            # Flatten and append histogram
            histograms.append(hist_intensity.flatten())

    return np.array(histograms)

hist_features = compute_and_average_features(extract_intensity_histograms)

In [11]:
def compute_histogram_features(histograms, size=256):
    # Initialize dictionary for features
    features = {
        "mean_intensities": [],
        "sds": [],
        "skews": [],
        "kurtoses": [],
        "entropies": [],
        "energies": [],
        "contrasts": [],
        "modes": [],
        "medians": [],
        "percentiles_25": [],
        "percentiles_75": [],
        "percentiles_90": []
    }

    for hist in histograms:
        # Normalize the histogram
        hist_norm = hist / hist.sum()

        # Mean Intensity
        mean_intensity = np.mean(hist_norm)
        features["mean_intensities"].append(mean_intensity)

        # Standard Deviation (SD)
        sd = np.std(hist_norm)
        features["sds"].append(sd)

        # Skewness
        skewness = skew(hist_norm)
        features["skews"].append(skewness)

        # Kurtosis
        kurt = kurtosis(hist_norm)
        features["kurtoses"].append(kurt)

        # Entropy
        ent = entropy(hist_norm)
        features["entropies"].append(ent)

        # Energy
        energy = np.sum(hist_norm ** 2)
        features["energies"].append(energy)

        # Contrast
        contrast = np.sum((np.arange(size) - mean_intensity) ** 2 * hist_norm)
        features["contrasts"].append(contrast)

        # Mode
        mode_value = mode(hist_norm)[0]
        features["modes"].append(mode_value)

        # Median
        cumulative_hist = np.cumsum(hist_norm)
        median = np.searchsorted(cumulative_hist, 0.5)
        features["medians"].append(median)

        # 25th Percentile
        percentile_25 = np.searchsorted(cumulative_hist, 0.25)
        features["percentiles_25"].append(percentile_25)

        # 75th Percentile
        percentile_75 = np.searchsorted(cumulative_hist, 0.75)
        features["percentiles_75"].append(percentile_75)

        # 90th Percentile
        percentile_90 = np.searchsorted(cumulative_hist, 0.90)
        features["percentiles_90"].append(percentile_90)

    return features


histogram_statistics = {}

for label in label_list:
    histogram_statistics[label] = compute_histogram_features(hist_features[label], 256)

## Optical flow as a dissimilarity measure

In [12]:
# Function to extract optical flow features from a grayscale video segment
def extract_optical_flow_features(segment):
    features = []
    prev_frame = None
    for frame in segment:
        if prev_frame is not None:
            # Calculate optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_frame, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

            # Flatten and append optical flow as feature
            features.append(flow.flatten())

        prev_frame = frame

    # Convert features to numpy array
    features = np.array(features)

    return features


optical_flow_features = compute_and_average_features(extract_optical_flow_features)

In [13]:
# Use these as direct coordinates
def reduce_optical_flow_features(features, n_components=20):
    
    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=n_components)
    features_reduced = pca.fit_transform(features)

    return features_reduced


optical_flow_features_reduced = {}

for label in label_list:
    optical_flow_features_reduced[label] = reduce_optical_flow_features(optical_flow_features[label])

In [14]:
def compute_flow_statistics(optical_flows, num_bins=10):
    # Initialize dictionary for features
    features = {
        "mean_flow_magnitudes": [],
        "sd_flow_magnitudes": [],
        "mean_flow_angles": [],
        "sd_flow_angles": [],
        "mean_flow_gradient_magnitudes": [],
        "sd_flow_gradient_magnitudes": [],
        "hist_flow_magnitudes": [],
        "hist_flow_angles": []
    }

    for flow in optical_flows:
        if flow.size == 0:
            # Handle case where there are no flow vectors
            features["mean_flow_magnitudes"].append(0)
            features["sd_flow_magnitudes"].append(0)
            features["mean_flow_angles"].append(0)
            features["sd_flow_angles"].append(0)
            features["mean_flow_gradient_magnitudes"].append(0)
            features["sd_flow_gradient_magnitudes"].append(0)
            features["hist_flow_magnitudes"].append(np.zeros(num_bins))
            features["hist_flow_angles"].append(np.zeros(num_bins))
            continue
        
        # Assuming flow is a 1D array where each pair of elements represents a flow vector
        num_vectors = flow.size // 2
        flow = flow.reshape((num_vectors, 2))  # Reshape into (num_vectors, 2)
        
        # Compute magnitudes and angles
        magnitudes = np.linalg.norm(flow, axis=1)
        angles = np.arctan2(flow[:, 1], flow[:, 0]) * 180 / np.pi
        
        # Mean and standard deviation of flow magnitudes
        features["mean_flow_magnitudes"].append(np.mean(magnitudes))
        features["sd_flow_magnitudes"].append(np.std(magnitudes))
        
        # Mean and standard deviation of flow angles
        features["mean_flow_angles"].append(np.mean(angles))
        features["sd_flow_angles"].append(np.std(angles))
        
        # Compute gradients
        gradients = np.gradient(flow, axis=0)
        gradient_magnitudes = np.linalg.norm(gradients, axis=1)
        
        # Mean and standard deviation of flow gradient magnitudes
        features["mean_flow_gradient_magnitudes"].append(np.mean(gradient_magnitudes))
        features["sd_flow_gradient_magnitudes"].append(np.std(gradient_magnitudes))
        
        # Histograms of flow magnitudes and angles
        hist_magnitude, _ = np.histogram(magnitudes, bins=num_bins, range=(0, np.max(magnitudes)))
        hist_angle, _ = np.histogram(angles, bins=num_bins, range=(-180, 180))
        
        features["hist_flow_magnitudes"].append(hist_magnitude)
        features["hist_flow_angles"].append(hist_angle)
    
    return features

flow_statistics = {}

for label in label_list:
    flow_statistics[label] = compute_flow_statistics(optical_flow_features[label])

## Texture feature - Local Binary Patterns (LBP)

In [15]:
def extract_lbp_features(segment):
    features = []
    for frame in segment:
        # Compute LBP
        lbp = local_binary_pattern(frame, 8, 1, method='uniform')
        
        # Calculate histogram of LBP
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 59), range=(0, 58))

        # Normalize histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        # Flatten and append LBP histogram as feature
        features.append(hist)

    return np.array(features)


lbp_features = compute_and_average_features(extract_lbp_features)



In [16]:
local_binary_pattern_statistics = {}

for label in label_list:
    local_binary_pattern_statistics[label] = compute_histogram_features(lbp_features[label], 58)

## Edge features

In [17]:
def extract_edge_features(segment, threshold1=100, threshold2=200):
    features = []
    for frame in segment:
        # Ensure frame is in the correct data type (CV_8U)
        frame = frame.astype(np.uint8)
        
        # Apply Canny edge detection
        edges = cv2.Canny(frame, threshold1, threshold2)
        
        # Calculate histogram of edge pixels
        hist, _ = np.histogram(edges.ravel(), bins=50, range=(0, 255))

        # Normalize histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        # Append edge histogram as feature
        features.append(hist)

    return np.array(features)

edge_features = compute_and_average_features(extract_edge_features)

In [18]:
edge_histogram_statistics = {}

for label in label_list:
    edge_histogram_statistics[label] = compute_histogram_features(edge_features[label], 50)

# Create Feature DF and save

In [19]:
def choose_column_in_matrix(matrix, i):
    return [row[i] for row in matrix]

In [20]:
def create_direct_coordinate_feature_df_all_labels(histogram_features_dict, optical_flow_features_dict, lbp_features_dict, edge_features_dict, corner_features_dict, keypoint_features_dict, label_list):
    """
    Create DataFrame for each label from features

    Parameters:
    - histogram_features_dict (dict): Dictionary containing histogram features for each label.
    - optical_flow_features_dict (dict): Dictionary containing optical flow features for each label.
    - spatiotemporal_features_dict (dict): Dictionary containing spatiotemporal features for each label.
    - label_list (list): List of labels for which we want to create dataframes.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_dfs = []
    
    for label in label_list:
        histogram_features = histogram_features_dict[label]
        optical_flow_features = optical_flow_features_dict[label]
        lbp_features = lbp_features_dict[label]
        edge_features = edge_features_dict[label]
        corner_features = corner_features_dict[label]
        prominent_corner_features = keypoint_features_dict[label]

        
        feature_df = pd.DataFrame()
        
        for dim_idx in range(256):
            feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)

        for dim_idx in range(20):
            feature_df["BI_Optical_Flow_"+str(dim_idx)] = choose_column_in_matrix(optical_flow_features, dim_idx)

        for dim_idx in range(58):
            feature_df["BI_LBP_Features_"+str(dim_idx)] = choose_column_in_matrix(lbp_features, dim_idx)

        for dim_idx in range(20):
            feature_df["BI_Edge_Features_"+str(dim_idx)] = choose_column_in_matrix(edge_features, dim_idx)

        for dim_idx in range(100):
            for col_idx in range(2):
                feature_df["BI_FAST_Corner_Features_"+str(dim_idx)+"_"+str(col_idx)] = [row[col_idx] for row in choose_column_in_matrix(corner_features, dim_idx)]

        for dim_idx in range(500):
            for col_idx in range(2):
                feature_df["BI_Prominent_Corner_Features_"+str(dim_idx)+"_"+str(col_idx)] = [row[col_idx] for row in choose_column_in_matrix(prominent_corner_features, dim_idx)]

        # Label
        feature_df["Label"] = label
        
        feature_dfs.append(feature_df)
    
    # Concatenate dataframes for all labels
    concatenated_df = pd.concat(feature_dfs, ignore_index=True)
    
    return concatenated_df

feature_df = create_direct_coordinate_feature_df_all_labels(hist_features, optical_flow_features_reduced, lbp_features, edge_features, corner_features, keypoint_features, label_list)


  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_features, dim_idx)
  feature_df["BI_Intensity_Histograms_Dim_"+str(dim_idx)] = choose_column_in_matrix(histogram_fe

In [21]:
feature_df.to_csv("Features/"+str(subject)+"/Traditional_Features_Direct_Coordinates.csv")

In [22]:
def create_statistics_feature_df_all_labels(FAST_corner_statistics, most_prominent_corner_statistics, histogram_statistics, flow_statistics, local_binary_pattern_statistics, edge_histogram_statistics, label_list):
    """
    Create DataFrame for each label from features

    Parameters:
    - histogram_features_dict (dict): Dictionary containing histogram features for each label.
    - optical_flow_features_dict (dict): Dictionary containing optical flow features for each label.
    - spatiotemporal_features_dict (dict): Dictionary containing spatiotemporal features for each label.
    - label_list (list): List of labels for which we want to create dataframes.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_dfs = []
    
    for label in label_list:
        histogram_features = histogram_statistics[label]
        optical_flow_features = flow_statistics[label]
        lbp_features = local_binary_pattern_statistics[label]
        edge_features = edge_histogram_statistics[label]
        corner_features = FAST_corner_statistics[label]
        prominent_corner_statistics = most_prominent_corner_statistics[label]
        
        feature_df = pd.DataFrame()
        
        for statistics_name in histogram_features.keys():
            feature_df["BI_Intensity_Histograms_"+str(statistics_name)] = histogram_features[statistics_name]

        for statistics_name in optical_flow_features.keys():
            if statistics_name == "hist_flow_magnitudes" or statistics_name == "hist_flow_angles":
                for coord_idx in range(len(optical_flow_features[statistics_name][0])):

                    feature_df["BI_Optical_Flow_"+str(statistics_name)+"_Coord_"+str(coord_idx)] = [entry[coord_idx] for entry in optical_flow_features[statistics_name]]
            else:
                feature_df["BI_Optical_Flow_"+str(statistics_name)] = optical_flow_features[statistics_name]

        for statistics_name in lbp_features.keys():
            feature_df["BI_LBP_Features_"+str(statistics_name)] = lbp_features[statistics_name]

        for statistics_name in edge_features.keys():
            feature_df["BI_Edge_Features_"+str(statistics_name)] = edge_features[statistics_name]

        for statistics_name in corner_features.keys():
            feature_df["BI_FAST_Corner_Features_"+str(statistics_name)] = corner_features[statistics_name]

        for statistics_name in prominent_corner_statistics.keys():
            feature_df["BI_Prominent_Corner_Features_"+str(statistics_name)] = prominent_corner_statistics[statistics_name]

        # Label
        feature_df["Label"] = label
        
        feature_dfs.append(feature_df)
    
    # Concatenate dataframes for all labels
    concatenated_df = pd.concat(feature_dfs, ignore_index=True)
    
    return concatenated_df

feature_df = create_statistics_feature_df_all_labels(FAST_corner_statistics, most_prominent_corner_statistics, histogram_statistics, flow_statistics, local_binary_pattern_statistics, edge_histogram_statistics, label_list)


In [23]:
feature_df.to_csv("Features/"+str(subject)+"/Traditional_Features_Statistics.csv")