In [19]:
import os
import pandas as pd
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from tqdm import tqdm

In [25]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
def extract_image_features(image_path):
    try:
        img = Image.open(image_path).convert('RGB')  # Load image
        img = transform(img).unsqueeze(0).to(device)  # Preprocess
        with torch.no_grad():
            features = resnet(img)  # Extract features
        return features.cpu().numpy().flatten()  # Convert to numpy array
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

In [2]:
base_dir = os.getcwd()
output_dir = os.path.join(base_dir, "..", "..", "output")

file_path1 = os.path.join(output_dir, "text_multimodal.csv")
df1 = pd.read_csv(file_path1)

In [35]:
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
df2 = df1[df1["Media File"].notna() & df1["Media File"].str.endswith(image_extensions, na=False)]

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet50(pretrained=True).to(device)
resnet.eval()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Hp/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:06<00:00, 16.9MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [36]:
tqdm.pandas()
df2["image_features"] = df2["Media File"].progress_apply(extract_image_features)

100%|██████████| 413/413 [00:55<00:00,  7.44it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["image_features"] = df2["Media File"].progress_apply(extract_image_features)


In [40]:
df1 = df1.merge(df2[['Message_ID', 'image_features']], on='Message_ID', how='left')

In [49]:
cols = ['Message_ID', 'text_embeddings', 'image_features', 'Media File', 'Fake_News_Flag']
df1 = df1[cols]

In [51]:
file_path2 = os.path.join(output_dir, "images_multimodal.csv")
df1.to_csv(file_path2, index=False)