In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
from pathlib import Path

# Function to calculate optical flow
def calculate_optical_flow(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, prev_frame = cap.read()
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    flow_values = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        flow_values.append(flow)
        prev_gray = gray
    
    cap.release()
    flow_values = np.array(flow_values)
    return np.mean(flow_values), np.std(flow_values)

# Function to calculate edge ratio (simplified as the ratio of edge pixels to total pixels)
def calculate_edge_ratio(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    edge_pixels = 0
    total_pixels = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        edges = cv2.Canny(frame, 100, 200)
        edge_pixels += np.sum(edges)  # Counting edge pixels
        total_pixels += frame.size  # Counting total pixels
    
    cap.release()
    return edge_pixels / total_pixels

# Function to count keypoints (using SIFT)
def count_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    sift = cv2.SIFT_create()
    keypoints = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        kp, _ = sift.detectAndCompute(gray, None)
        keypoints += len(kp)  # Count number of keypoints
    
    cap.release()
    return keypoints

# Function to extract FFT peaks (example: take the peak values of frequency components)
def extract_fft_peaks(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    
    fft_peaks = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        fft = np.fft.fft2(gray)
        fft_abs = np.abs(fft)
        fft_peaks.append(np.max(fft_abs))  # Max peak in FFT
    
    cap.release()
    return fft_peaks[:3]  # Return top 3 peaks

# Main feature extraction function
def extract_features_for_videos(video_paths, state, segment):
    data = []
    for video_path in tqdm(video_paths):
        video_name = os.path.basename(video_path)
        
        # Extract features
        mean_flow, std_flow = calculate_optical_flow(video_path)
        edge_ratio_value = calculate_edge_ratio(video_path)
        keypoint_count = count_keypoints(video_path)
        fft_peaks = extract_fft_peaks(video_path)
        
        # Add row for this video
        data.append([
            video_name, mean_flow, std_flow, edge_ratio_value, keypoint_count, 
            *fft_peaks, state, segment
        ])
    
    return data

# Directory structure setup
base_dir = Path('evm_segmented_videos')  # Base directory where your video segments are located
output_dir = Path('Datasets')  # Directory where final datasets will be saved

# Create subdirectories for each state
states = ['Bearing_fault', 'Normal_state', 'Unbalance_weight']
for state in states:
    state_dir = output_dir / state
    state_dir.mkdir(parents=True, exist_ok=True)

# Loop through each state and segment type to process videos and generate CSVs
for state in states:
    state_path = base_dir / state
    for segment in ['segmented_5', 'segmented_5_overlap', 'segmented_10', 'segmented_10_overlap', 'segmented_15', 'segmented_15_overlap']:
        segment_path = state_path / segment
        video_paths = glob(str(segment_path / "*.avi"))
        
        # Extract features for all videos in this segment and state
        data = extract_features_for_videos(video_paths, state, segment)
        
        # Create DataFrame from extracted data
        columns = ['video_name', 'mean_flow', 'std_flow', 'edge_ratio', 'keypoint_count', 'fft_peak1', 'fft_peak2', 'fft_peak3', 'state', 'segment']
        df = pd.DataFrame(data, columns=columns)
        
        # Save DataFrame to CSV in the appropriate directory
        output_csv = output_dir / state / f"{segment}_combined_features.csv"
        df.to_csv(output_csv, index=False)

print("Feature extraction and CSV generation completed!")
