In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [None]:
train_data_path = 'cifake dataset/train'
test_data_path = 'cifake dataset/test'

# Create empty lists to store image data and labels
data = []
labels = []

# Loop through the REAL and FAKE subfolders in the train data directory
for folder in os.listdir(train_data_path):
    folder_path = os.path.join(train_data_path, folder)
    label = folder  # Label is either 'REAL' or 'FAKE'

    # Loop through the images in each subfolder
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path)
        image = cv2.resize(image, (32, 32))  # we are given 32*32 images but created check for consistency
        data.append(image)
        labels.append(label)

In [None]:
# Convert data and labels to NumPy arrays
data = np.array(data)
labels = np.array(labels)

# Encode the labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
# Create an SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)  # You can choose different kernels (e.g., 'linear', 'rbf', etc.)

# Train the classifier on the training data
svm_classifier.fit(X_train.reshape(-1, 32*32*3), y_train)
print("Model Trained")

# Make predictions on the validation data
val_predictions = svm_classifier.predict(X_val.reshape(-1, 32*32*3))

# Calculate accuracy on the validation set
validation_accuracy = accuracy_score(y_val, val_predictions)
print("Validation Accuracy:", validation_accuracy)

### TEST DATA

In [None]:
# Load and preprocess the test data
test_data = []
test_labels = []

for folder in os.listdir(test_data_path):
    folder_path = os.path.join(test_data_path, folder)
    label = folder  # Label is either 'REAL' or 'FAKE'

    # Loop through the images in each subfolder
    for filename in os.listdir(folder_path):
        image_path = os.path.join(folder_path, filename)
        image = cv2.imread(image_path)
        image = cv2.resize(image, (32, 32))  # we are given 32*32 images but created check for consistency
        test_data.append(image)
        test_labels.append(label)

In [None]:
# Convert test data to a NumPy array
test_data = np.array(test_data)

# Make predictions on the test data
test_predictions = svm_classifier.predict(test_data.reshape(-1, 32*32*3))
# Map the predicted labels back to 'real' and 'fake'
test_predictions = label_encoder.inverse_transform(test_predictions)

# Calculate accuracy on the test data
test_accuracy = accuracy_score(test_labels, test_predictions)
print("Test Accuracy:", test_accuracy)