In [2]:
import cv2
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
# # Define a function to extract features from a video
# def extract_features(video_path):
#     cap = cv2.VideoCapture(video_path)
    
#     features = []
    
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break
        
#         # Example: Extract color histogram
#         hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
#         hist = cv2.normalize(hist, hist).flatten()
        
#         features.append(hist)
    
#     cap.release()
    
#     # Return the mean histogram for the video
#     return np.mean(features, axis=0)

In [4]:
# Paths to your video datasets
real_videos_path = r'C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\REAL'
fake_videos_path = r'C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\FAKE'

In [5]:
# data = []

# # Process real videos
# for video_name in os.listdir(real_videos_path):
#     video_path = os.path.join(real_videos_path, video_name)
#     features = extract_features(video_path)
#     data.append(features.tolist() + ['real'])  # Append features and label

# # Process fake videos
# for video_name in os.listdir(fake_videos_path):
#     video_path = os.path.join(fake_videos_path, video_name)
#     features = extract_features(video_path)
#     data.append(features.tolist() + ['fake'])  # Append features and label

# # Create a DataFrame
# columns = [f'feature_{i}' for i in range(len(data[0]) - 1)] + ['label']
# df = pd.DataFrame(data, columns=columns)

# # Save the features to a CSV file
# df.to_csv('video_features.csv', index=False)

In [6]:
def extract_optical_flow(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, prev_frame = cap.read()
    prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    
    flow_magnitudes = []
    
    while True:
        ret, curr_frame = cap.read()
        if not ret:
            break
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        
        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(prev_frame, curr_frame_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        flow_magnitudes.append(np.mean(magnitude))
        
        prev_frame = curr_frame_gray
    
    cap.release()
    return np.mean(flow_magnitudes) if flow_magnitudes else 0

In [7]:
def extract_statistics(video_path):
    cap = cv2.VideoCapture(video_path)
    
    # Initialize variables for calculating statistics
    total_intensity_sum = 0
    total_intensity_square_sum = 0
    total_pixel_count = 0
    median_intensity_values = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Sum up intensities and squares of intensities for mean and std calculation
        total_intensity_sum += np.sum(gray_frame)
        total_intensity_square_sum += np.sum(gray_frame**2)
        total_pixel_count += gray_frame.size
        median_intensity_values.append(np.median(gray_frame))
    
    cap.release()
    
    
        # Calculate mean, standard deviation, and median
    mean_intensity = total_intensity_sum / total_pixel_count if total_pixel_count else 0
    variance_intensity = (total_intensity_square_sum / total_pixel_count - mean_intensity**2) if total_pixel_count else 0
    std_intensity = np.sqrt(variance_intensity)
    median_intensity = np.median(median_intensity_values) if median_intensity_values else 0
    
    # Return computed statistics
    return [mean_intensity, std_intensity, median_intensity]

In [8]:
# Create a DataFrame to store the features and labels
data = []

# Process real videos
for video_name in os.listdir(real_videos_path):
    video_path = os.path.join(real_videos_path, video_name)
    
    # Extract features
    optical_flow = extract_optical_flow(video_path)
    stats = extract_statistics(video_path)
    
    data.append(stats + [optical_flow] + ['real'])  # Append statistics and optical flow

# Process fake videos
for video_name in os.listdir(fake_videos_path):
    video_path = os.path.join(fake_videos_path, video_name)
    
    # Extract features
    optical_flow = extract_optical_flow(video_path)
    stats = extract_statistics(video_path)
    
    data.append(stats + [optical_flow] + ['fake'])  # Append statistics and optical flow

# Create a DataFrame
columns = ['mean_intensity', 'std_intensity', 'median_intensity', 'optical_flow', 'label']
df = pd.DataFrame(data, columns=columns)

# Save the features to a CSV file
df.to_csv('2_video_features_with_statistics.csv', index=False)

KeyboardInterrupt: 

In [None]:
# Plotting
plt.figure(figsize=(15, 10))

# Histogram of Mean Intensity
plt.subplot(2, 3, 1)
sns.histplot(df['mean_intensity'], bins=20, kde=True)
plt.title('Distribution of Mean Intensity')
plt.xlabel('Mean Intensity')

# Histogram of Standard Deviation of Intensity
plt.subplot(2, 3, 2)
sns.histplot(df['std_intensity'], bins=20, kde=True)
plt.title('Distribution of Standard Deviation of Intensity')
plt.xlabel('Standard Deviation of Intensity')

# Box Plot for Mean Intensity by Label
plt.subplot(2, 3, 3)
sns.boxplot(x='label', y='mean_intensity', data=df)
plt.title('Mean Intensity by Video Type')
plt.xlabel('Video Type')
plt.ylabel('Mean Intensity')

# Box Plot for Optical Flow by Label
plt.subplot(2, 3, 4)
sns.boxplot(x='label', y='optical_flow', data=df)
plt.title('Optical Flow by Video Type')
plt.xlabel('Video Type')
plt.ylabel('Optical Flow')

# Pairplot to visualize relationships
plt.subplot(2, 3, 5)
sns.pairplot(df, hue='label', vars=['mean_intensity', 'std_intensity', 'optical_flow'])

plt.tight_layout()
plt.show()

In [None]:

def extract_optical_flow(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, prev_frame = cap.read()
    prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    
    flow_magnitudes = []
    
    while True:
        ret, curr_frame = cap.read()
        if not ret:
            break
        curr_frame_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        
        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(prev_frame, curr_frame_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        flow_magnitudes.append(np.mean(magnitude))
        
        prev_frame = curr_frame_gray
    
    cap.release()
    return np.mean(flow_magnitudes) if flow_magnitudes else 0, np.var(flow_magnitudes) if flow_magnitudes else 0

def extract_intensity_statistics(video_path):
    cap = cv2.VideoCapture(video_path)
    
    total_intensity_sum = 0
    total_pixel_count = 0
     
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Sum intensities for mean calculation
        total_intensity_sum += np.sum(gray_frame)
        total_pixel_count += gray_frame.size
    
    cap.release()
    
    # Calculate mean intensity
    mean_intensity = total_intensity_sum / total_pixel_count if total_pixel_count > 0 else 0
    return mean_intensity

# Paths to your video datasets
real_videos_path = r"C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\REAL"
fake_videos_path = r"C:\Users\Suyash Tambe\Desktop\Deepfakevideo\train_samples\FAKE"

# Store the features and labels in a DataFrame
data = []

# Process real videos
for video_name in os.listdir(real_videos_path):
    video_path = os.path.join(real_videos_path, video_name)
    
    # Extract features
    mean_intensity = extract_intensity_statistics(video_path)
    optical_flow_mean, optical_flow_variance = extract_optical_flow(video_path)
    
    data.append([mean_intensity, optical_flow_mean, optical_flow_variance, 'real'])

# Process fake videos
for video_name in os.listdir(fake_videos_path):
    video_path = os.path.join(fake_videos_path, video_name)
    # Extract features
    mean_intensity = extract_intensity_statistics(video_path)
    
    optical_flow_mean, optical_flow_variance = extract_optical_flow(video_path)
    
    data.append([mean_intensity, optical_flow_mean, optical_flow_variance, 'fake'])

# Create a DataFrame
columns = ['mean_intensity', 'optical_flow_mean' , 'optical_flow_variance', 'label']
df = pd.DataFrame(data, columns=columns)

# Save the features to a CSV file
df.to_csv('2_video_features_statistical.csv', index=False)

