In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array


In [2]:
# Load the labels from video_frames.csv without a header
labels_df = pd.read_csv('video_frames.csv', header=None, names=['frame_name', 'label'])

# Display the first few rows of the dataframe
print(labels_df.head())



        frame_name label
0  000_frame_0.jpg  real
1  000_frame_1.jpg  real
2  000_frame_2.jpg  real
3  000_frame_3.jpg  real
4  000_frame_4.jpg  real


In [4]:
# Function to load and preprocess the image
def preprocess_image(image_path, target_size=(150, 150)):
    img = load_img(image_path, target_size=target_size)  # Load image and resize
    img_array = img_to_array(img)  # Convert to array
    img_array = img_array / 255.0  # Normalize the values
    return img_array

# Load and preprocess all images based on the labels
def load_dataset_in_batches(frames_folder, labels_df, batch_size=1000):
    num_samples = len(labels_df)
    X, y = [], []
    
    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch_X, batch_y = [], []
        
        for index in range(start, end):
            row = labels_df.iloc[index]
            image_path = os.path.join(frames_folder, row['frame_name'])
            label = [1, 0] if row['label'] == 'real' else [0, 1]

            if os.path.exists(image_path):
                img_array = preprocess_image(image_path)
                batch_X.append(img_array)
                batch_y.append(label)
        
        X.append(batch_X)
        y.append(batch_y)

    return np.concatenate(X), np.concatenate(y)


# Load the dataset
frames_folder = 'frames_dataset'
X, y = load_dataset_in_batches(frames_folder, labels_df)

print(f'Loaded {len(X)} images with shape: {X.shape}')


Loaded 30000 images with shape: (30000, 150, 150, 3)


In [6]:
import pickle

# Save X and y using pickle
with open('dataset.pkl', 'wb') as f:
    pickle.dump((X, y), f)