In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'test-data:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4975033%2F8368843%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240509%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240509T200616Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D6722861583630e7501ea684c0608cc5da591fb6673454ba542eeb0ad709516ced3587b85fd64e039ea35a575a787df8a606436ca8f3c25f17a7d8502a6994f0fd0d93ca3de6a8863881e8508d025609c67a977abd74dd95ca5f189d74a086be9328412742b57e30ed79ff0c8a0aada3aa3bdd2346f33abb93ca442925f0ce56d30cdf976fe6e562c659282707bf773b203cf96b1fd7299a512501ae0d301d6f27485b2c035fcc32ca2885258ffef7345b6231403d186d4a067c6afc531403c5412d572cf78ba99bd36ef1a12bce61573ec1fbddf27afc9566b4f0890fdabc2beeed64ecc2c8bcca3ae2787312124bbd6d2e2dafe9c503b602cdd381378746abc,training-data:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4975042%2F8368854%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240509%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240509T200616Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D22e7373d6051b09c78cb0d41495ebce5263f70aafa62b8abc52ae14b78f3fa9e6bda1a57f6fad50909ecada1c9a998eeebb82065407ea29d0ae59d6e0d88780f76b27da96bd29b73bf00d5b3b716a5428a6779f21fe985e16b9f9770bbf33de812bb39273cc7bbe9371e3b6f721b5a56413d98fd3d505566e8620300fa7e7866c0590694e13e6375f48e9f344eb1ad73acfffbbf9d040a3512bd9fd199a27655f09cb20293a8b378ddb8cd74953f9c9bab4af9441738c346626f2ece3e00b3902d1127a9e15fbbd6d887876458b51f5c0a8cb759524fb8c8ef88b4b85444b9a18f9300d70522884e10be6a2c1388be26a86a3c763136b95c8bb031dcef32a988'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading test-data, 31525870 bytes compressed
Downloaded and uncompressed: test-data
Downloading training-data, 480056651 bytes compressed
Downloaded and uncompressed: training-data
Data source import complete.


In [2]:
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import pandas as pd

In [3]:
df_inputs = pd.read_csv('/kaggle/input/training-data/Final_Processed_video/Final_Processed_video/metadata_Training_Inputs.csv')

In [4]:
df_inputs['filePath'] = '/kaggle/input/training-data/Final_Processed_video/Final_Processed_video/' + df_inputs['file_name']

In [5]:
df_inputs

Unnamed: 0.1,Unnamed: 0,file_name,label,split,original,filePath
0,0,aagfhgtpmv,FAKE,train,vudstovrck.mp4,/kaggle/input/training-data/Final_Processed_vi...
1,2,abarnvbtwb,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...
2,4,abqwwspghj,FAKE,train,qzimuostzz.mp4,/kaggle/input/training-data/Final_Processed_vi...
3,5,acifjvzvpm,FAKE,train,kbvibjhfzo.mp4,/kaggle/input/training-data/Final_Processed_vi...
4,6,acqfdwsrhi,FAKE,train,ccfoszqabv.mp4,/kaggle/input/training-data/Final_Processed_vi...
...,...,...,...,...,...,...
317,369,ekcrtigpab,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...
318,373,ellavthztb,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...
319,383,eqnoqyfquo,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...
320,385,erlvuvjsjf,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...


In [6]:
import os
import shutil
import pandas as pd

# Assuming df is your DataFrame with columns 'foldername', 'label'
# df = pd.read_csv('path_to_your_dataframe.csv')
df = df_inputs

# Create the new dataset folder if it doesn't exist
newdataset_path = '/content/Dataset'
os.makedirs(newdataset_path, exist_ok=True)

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    foldername = row['filePath']
    label = row['label']
    videoFileName = row['file_name']

    # Define the source and destination paths for the image
    image_path = foldername
    destination_folder = os.path.join(newdataset_path, label)

    # Create the label folder if it doesn't exist inside newdataset
    os.makedirs(destination_folder, exist_ok=True)

    try:
        # Move the images to their respective label folders
        image_files = os.listdir(image_path)
        for image_file in image_files:
            source = os.path.join(image_path, image_file)
            destination = os.path.join(destination_folder,  videoFileName+'_'+ image_file)
            shutil.copy(source, destination)
        print(f'Images from {foldername} moved successfully!')
    except FileNotFoundError:
        print(f'Folder {foldername} not found. Skipping...')
        continue

print('Image processing completed!')


Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/aagfhgtpmv moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/abarnvbtwb moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/abqwwspghj moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/acifjvzvpm moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/acqfdwsrhi moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/acxnxvbsxk moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/acxwigylke moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_video/aczrgyricp moved successfully!
Images from /kaggle/input/training-data/Final_Processed_video/Final_Processed_vi

In [7]:
df_inputs.head(4)

Unnamed: 0.1,Unnamed: 0,file_name,label,split,original,filePath
0,0,aagfhgtpmv,FAKE,train,vudstovrck.mp4,/kaggle/input/training-data/Final_Processed_vi...
1,2,abarnvbtwb,REAL,train,,/kaggle/input/training-data/Final_Processed_vi...
2,4,abqwwspghj,FAKE,train,qzimuostzz.mp4,/kaggle/input/training-data/Final_Processed_vi...
3,5,acifjvzvpm,FAKE,train,kbvibjhfzo.mp4,/kaggle/input/training-data/Final_Processed_vi...


In [8]:
# import warnings
# warnings.filterwarnings('ignore')

In [9]:
model = models.inception_v3(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 10),
    nn.Linear(10, 2)
)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:01<00:00, 78.1MB/s]


In [10]:
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize images to a fixed size
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    # Add more transformations as needed
])

In [12]:
import os

directory = '/content/Dataset/FAKE'
files = os.listdir(directory)

# Filter out files with ".jpg" extension
jpg_files = [file for file in files if file.endswith('.jpg')]

# Delete files with other extensions
for file in files:
    if file not in jpg_files:
        os.remove(os.path.join(directory, file))


In [13]:
from torchvision.datasets import ImageFolder
train_dataset = ImageFolder(root="/content/Dataset/", transform=transform)
test_dataset = ImageFolder(root="/kaggle/input/training-data/Final_Processed_video/Final_Processed_video", transform=transform)

In [14]:
torch.cuda.is_available()

True

In [15]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [16]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'


In [17]:
os.environ['TORCH_USE_CUDA_DSA']='1'

In [18]:
# device='cuda'
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=32)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        # print(inputs.to(device))
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        logits = outputs.logits
        # print(logits)
        # print(labels)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/10, Loss: 0.1083
Epoch 2/10, Loss: 0.0363
Epoch 3/10, Loss: 0.0253
Epoch 4/10, Loss: 0.0177
Epoch 5/10, Loss: 0.0128
Epoch 6/10, Loss: 0.0107
Epoch 7/10, Loss: 0.0112
Epoch 8/10, Loss: 0.0094
Epoch 9/10, Loss: 0.0060
Epoch 10/10, Loss: 0.0077


In [19]:
# !zip -r /kaggle/working/Fake.zip /kaggle/working/FAKE
# !zip -r /kaggle/working/Real.zip /kaggle/working/REAL

torch.save(model.state_dict(), 'model_weights.pth')


In [20]:
model.eval()  # Switch to evaluation mode
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_dataset:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f"Accuracy : {test_accuracy:.2f}")


Accuracy : 95.41
