In [18]:
import os
os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
os.environ["CUDA_VISIBLE_DEVICES"] = '0,3,6,7'

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/multimodal_test_public.tsv', sep='\t')

df=df.head(1000)
labels = df['2_way_label'].values

train_texts, test_texts, train_labels, test_labels = train_test_split(df[['author','clean_title','domain','image_url','score','num_comments','subreddit','upvote_ratio','2_way_label']], labels, test_size=0.2, random_state=42)


In [20]:
pip install transformers



In [21]:
from transformers import BertTokenizer, BertModel
import numpy as np
import torch
import cv2
import numpy as np
import pandas as pd
import requests
from io import BytesIO
from PIL import Image
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')
def encode_text(author, clean_title, domain):
    text = f"{author} {clean_title} {domain}"
    input_ids = tokenizer.encode(text, return_tensors='pt', max_length=512, truncation=True)
    with torch.no_grad():
        outputs = bert_model(input_ids)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

def vectorize_image_from_url(url, target_size=(224, 224)):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for successful response
        image = cv2.imdecode(np.frombuffer(response.content, np.uint8), cv2.IMREAD_COLOR)

        if image is None:
            raise Exception("Failed to decode image from URL.")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, target_size)  # Resize to a consistent size
        image_array = image.astype(np.float32) / 255.0
        tensor_form = image_array

        print(f"Image loaded successfully from URL: {url}")
        return tensor_form

    except Exception as e:
        print(f"Error processing image from URL {url}: {str(e)}")
        return None

In [22]:
train_embeddings = []
X_train_image = []
train_labels=[]
target_size = (224, 224)
for author, clean_title, domain, img_url,lbl in zip(train_texts['author'], train_texts['clean_title'], train_texts['domain'], train_texts['image_url'],train_texts['2_way_label']):
    text_embedding = encode_text(author, clean_title, domain)
    image_embedding = vectorize_image_from_url(img_url, target_size)

    if text_embedding is not None and image_embedding is not None:
        train_embeddings.append(text_embedding)
        X_train_image.append(image_embedding)
        train_labels.append(lbl)

train_embeddings = np.array(train_embeddings)
X_train_image = np.array(X_train_image)

Image loaded successfully from URL: https://external-preview.redd.it/8vJo4CT2hrGTrDBTO2QadRJEghv3Q0g_0LHNusu2j7I.jpg?width=320&crop=smart&auto=webp&s=19dff5a883448136a97e0ca91de95aa5c6154314
Image loaded successfully from URL: https://preview.redd.it/d99lzlvypgm31.jpg?width=320&crop=smart&auto=webp&s=6bdf01aa800c65534a1db262ad2bd3776157bb5b
Image loaded successfully from URL: http://i.imgur.com/5DaKRXE.jpg
Image loaded successfully from URL: http://i.imgur.com/tCM5nBp.jpg
Error processing image from URL https://preview.redd.it/o4xqa147u3p21.jpg?width=320&crop=smart&auto=webp&s=1541b068d48caa2fa4273bcb1246548be0840d3e: 404 Client Error: Not Found for url: https://preview.redd.it/o4xqa147u3p21.jpg?width=320&crop=smart&auto=webp&s=1541b068d48caa2fa4273bcb1246548be0840d3e
Image loaded successfully from URL: https://external-preview.redd.it/EIMuK6HWIMrpD312XwL1TwGK8dXZwH1zVhxCytCs9PQ.jpg?width=320&crop=smart&auto=webp&s=e93c9a1b3ecd5d5ffe76b0d1a8ee1b461bb9e4a2
Error processing image from UR

In [23]:
test_embeddings = []
X_test_image = []
test_labels =[]
target_size = (224, 224)
for author, clean_title, domain, img_url,lbl in zip(test_texts['author'], test_texts['clean_title'], test_texts['domain'], test_texts['image_url'],test_texts['2_way_label']):
    text_embedding = encode_text(author, clean_title, domain)
    image_embedding = vectorize_image_from_url(img_url, target_size)

    if text_embedding is not None and image_embedding is not None:
        test_embeddings.append(text_embedding)
        X_test_image.append(image_embedding)
        test_labels.append(lbl)

test_embeddings = np.array(test_embeddings)
X_test_image = np.array(X_test_image)

Image loaded successfully from URL: https://external-preview.redd.it/eIhtbfjltQ7gEmRccS1FnziA67GjC35lS2k68QxnnTs.jpg?width=320&crop=smart&auto=webp&s=0199cb661068dfdd0ec033d7366429b7a514dd7b
Image loaded successfully from URL: https://preview.redd.it/cbmjh7y59x901.jpg?width=320&crop=smart&auto=webp&s=36767968e34454a15de2ddd3f1547688b3050f24
Image loaded successfully from URL: https://preview.redd.it/pxv4ih8uwsn31.jpg?width=320&crop=smart&auto=webp&s=c9926eba39ebdd5b2423e78bfa087f09d44dee5b
Image loaded successfully from URL: https://external-preview.redd.it/5YfuQ8huE6qyaC-k59t8H_yBCFMq0Pv3-RcDVW8RO3U.jpg?width=320&crop=smart&auto=webp&s=1a7d79ef30bf4aed7dca5472370c1f6dbf826eb5
Image loaded successfully from URL: https://i.imgur.com/El7PMm9.jpg
Image loaded successfully from URL: https://preview.redd.it/w47v0x3xhhm21.jpg?width=320&crop=smart&auto=webp&s=d81c287387f4e9fd71b63b2b1ece77f2108d88ca
Image loaded successfully from URL: https://preview.redd.it/jnn2djrg2up31.jpg?width=320&crop=s

In [25]:
from tensorflow.keras import layers
text_input = layers.Input(shape=(train_embeddings.shape[1],))
text_layer = layers.Dense(128, activation='relu')(text_input)
image_input = layers.Input(shape=(224, 224, 3))
image_layer = layers.Conv2D(64, (3, 3), activation='relu')(image_input)
image_layer = layers.MaxPooling2D((2, 2))(image_layer)
image_layer = layers.Flatten()(image_layer)
merged = layers.concatenate([text_layer, image_layer])
output = layers.Dense(1, activation='sigmoid')(merged)
print("-----------")
print(output)

-----------
KerasTensor(type_spec=TensorSpec(shape=(None, 1), dtype=tf.float32, name=None), name='dense_7/Sigmoid:0', description="created by layer 'dense_7'")


In [27]:
from tensorflow import keras
import tensorflow as tf
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
model = keras.Model(inputs=[text_input,  image_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(train_embeddings.shape)
print(X_train_image.shape)
print(train_labels.shape)
# Train the model
model.fit([train_embeddings, X_train_image], train_labels, epochs=5, batch_size=32, validation_split=0.2)
print(test_embeddings.shape)
print(X_test_image.shape)
print(test_labels.shape)
# Evaluate the model
accuracy = model.evaluate([test_embeddings, X_test_image], test_labels)
print(f'Test Accuracy: {accuracy[1]*100:.2f}%')

(768, 768)
(768, 224, 224, 3)
(768,)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
(194, 768)
(194, 224, 224, 3)
(194,)
Test Accuracy: 74.74%
