In [50]:
import os
import cv2
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
from pathlib import Path  


In [51]:

# Feature extraction functions (modified for per-frame calculations)
def calculate_optical_flow_per_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    flow_values = []

    ret, prev_frame = cap.read()
    if not ret:
        cap.release()
        return []

    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                            0.5, 3, 15, 3, 5, 1.2, 0)
        mean_flow = np.mean(flow)
        std_flow = np.std(flow)
        flow_values.append((mean_flow, std_flow))
        prev_gray = gray
    
    cap.release()
    return flow_values


In [52]:

def calculate_edge_ratio_per_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    edge_ratios = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        edges = cv2.Canny(frame, 100, 200)
        edge_pixels = np.count_nonzero(edges)
        total_pixels = frame.shape[0] * frame.shape[1]
        edge_ratios.append(edge_pixels / total_pixels if total_pixels != 0 else 0)
    
    cap.release()
    return edge_ratios
 

In [53]:


def count_keypoints_per_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    sift = cv2.SIFT_create()
    keypoints_per_frame = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        kp, _ = sift.detectAndCompute(gray, None)
        keypoints_per_frame.append(len(kp))
    
    cap.release()
    return keypoints_per_frame


In [54]:


def extract_fft_peaks_per_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    peak_values = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        fft = np.fft.fft2(gray)
        fft_shift = np.fft.fftshift(fft)
        fft_magnitude = np.abs(fft_shift)
        peak_values.append(np.max(fft_magnitude))
    
    cap.release()
    return peak_values


In [55]:
def extract_features_for_videos(video_paths, state, segment):
    data = []
    for video_path in tqdm(video_paths, desc=f"Processing {state} - {segment}"):
        video_name = os.path.basename(video_path)
        
        # Determine view from filename
        view = 'angle' if 'angle' in video_name.lower() else 'front'

        # Extract per-frame features
        flow_values = calculate_optical_flow_per_frame(video_path)
        edge_ratios = calculate_edge_ratio_per_frame(video_path)
        keypoint_counts = count_keypoints_per_frame(video_path)
        fft_peaks = extract_fft_peaks_per_frame(video_path)

        # Ensure all feature lists have the same length
        max_frames = max(len(flow_values), len(edge_ratios), len(keypoint_counts), len(fft_peaks))
        
        # Pad feature lists if they are shorter than the longest list
        flow_values.extend([(0, 0)] * (max_frames - len(flow_values)))
        edge_ratios.extend([0] * (max_frames - len(edge_ratios)))
        keypoint_counts.extend([0] * (max_frames - len(keypoint_counts)))
        fft_peaks.extend([0] * (max_frames - len(fft_peaks)))

        # Add data for each frame
        cap = cv2.VideoCapture(video_path)
        frame_index = 0  # Start at frame 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            # Unpack flow values for the current frame
            mean_flow, std_flow = flow_values[frame_index]
            
            # Extract features for the current frame
            data.append([video_name, frame_index, mean_flow, std_flow, 
                         edge_ratios[frame_index], keypoint_counts[frame_index], fft_peaks[frame_index], view, state])
            frame_index += 1
        cap.release()
    
    return data


In [None]:

# Directory setup
base_dir = Path('evm_segmented_videos')
output_dir = Path('Datasets')
states = ['Bearing_fault', 'Normal_state', 'Unbalance_weight']
segments = ['segmented_5', 'segmented_5_overlap', 'segmented_10',
            'segmented_10_overlap', 'segmented_15', 'segmented_15_overlap']

for state in states:
    (output_dir / state).mkdir(parents=True, exist_ok=True)

# Process and generate CSVs
for state in states:
    for segment in segments:
        segment_path = base_dir / state / segment
        video_paths = glob(str(segment_path / "*.avi"))
        if not video_paths:
            continue
        segment_data = extract_features_for_videos(video_paths, state, segment)
        # Ensure the directory exists before saving
        segment_output_dir = output_dir / state / segment
        segment_output_dir.mkdir(parents=True, exist_ok=True)
        if segment_data:
            columns = ['video_name', 'frame_index', 'mean_flow', 'std_flow', 'edge_ratio', 'keypoint_count',
                       'fft_peak', 'view', 'state']
            df = pd.DataFrame(segment_data, columns=columns)
            output_csv = segment_output_dir / f"{segment}_features.csv"
            df.to_csv(output_csv, index=False)

print("✅ Feature extraction and CSV generation for each time slice completed!")

Processing Bearing_fault - segmented_5: 100%|██████████| 36/36 [01:39<00:00,  2.78s/it]
Processing Bearing_fault - segmented_5_overlap:  64%|██████▍   | 46/72 [02:32<01:10,  2.70s/it]