In [2]:
pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import cv2
import pytesseract
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import tensorflow.keras as keras

In [2]:
# Step 1: Data Preparation
data_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes'  # Root folder containing all the image folders
# Step 1: Data Preparation
#islamophobic_text_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes/islamophobic text'
#not_islamophobic_text_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes/Non islamophobic text'
#islamophobic_nature_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes/Islamophobic image'
#not_islamophobic_nature_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes/Non Islamophobic image'

In [3]:
# Step 2: Text Detection
def extract_text_from_image(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    text = pytesseract.image_to_string(gray)
    return text

In [4]:
# Step 3: Feature Extraction for Text Classification
def extract_text_features(data_folder):
    images = []
    labels = []
    for class_label, class_name in enumerate(os.listdir(data_folder)):
        class_folder = os.path.join(data_folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        for file_name in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file_name)
            image = extract_text_from_image(file_path)
            images.append(image)
            labels.append(class_label)
    vectorizer = TfidfVectorizer()
    text_features = vectorizer.fit_transform(images)
    return text_features, labels

text_features, labels = extract_text_features(data_folder)

In [6]:
# Step 4: Model Training for Text Classification
text_model = SVC(kernel='linear')
text_model.fit(text_features, labels)


In [7]:
# Step 5: Model Evaluation for Text Classification
text_pred = text_model.predict(text_features)

text_accuracy = accuracy_score(labels, text_pred)
text_precision = precision_score(labels, text_pred, average='weighted')
text_recall = recall_score(labels, text_pred, average='weighted')
text_f1_score = f1_score(labels, text_pred, average='weighted')

print("Text Accuracy:", text_accuracy)
print("Text Precision:", text_precision)
print("Text Recall:", text_recall)
print("Text F1-score:", text_f1_score)

Text Accuracy: 0.42925
Text Precision: 0.45541389663364085
Text Recall: 0.42925
Text F1-score: 0.4233844220830174


In [9]:
# Step 6: Save Text Model
text_model_file = 'text_model.pkl'
joblib.dump(text_model, text_model_file)
print("Text Model saved successfully.")

Text Model saved successfully.


In [10]:
# Step 7: Load and Preprocess Images for Nature Classification
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))  # Resize image to fit CNN model input shape
    img = img.astype('float32') / 255.0  # Normalize pixel values to [0, 1]
    return img


In [11]:
def load_images(data_folder):
    images = []
    labels = []
    for class_label, class_name in enumerate(os.listdir(data_folder)):
        class_folder = os.path.join(data_folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        for file_name in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file_name)
            image = preprocess_image(file_path)
            images.append(image)
            labels.append(class_label)
    return np.array(images), np.array(labels)

all_images, all_labels = load_images(data_folder)

In [13]:
# Step 8: Model Training for Nature Classification
nature_model = Sequential()
nature_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
nature_model.add(MaxPooling2D((2, 2)))
nature_model.add(Conv2D(64, (3, 3), activation='relu'))
nature_model.add(MaxPooling2D((2, 2)))
nature_model.add(Conv2D(128, (3, 3), activation='relu'))
nature_model.add(MaxPooling2D((2, 2)))
nature_model.add(Flatten())
nature_model.add(Dense(128, activation='relu'))
nature_model.add(Dense(len(set(all_labels)), activation='softmax'))
# Print the model summary
nature_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 111, 111, 32)     0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 26, 26, 128)     

In [14]:
nature_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
nature_model.fit(all_images, all_labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x173dad12560>

In [15]:
# Step 9: Model Evaluation for Nature Classification
nature_pred = np.argmax(nature_model.predict(all_images), axis=-1)

nature_accuracy = accuracy_score(all_labels, nature_pred)
nature_precision = precision_score(all_labels, nature_pred, average='weighted')
nature_recall = recall_score(all_labels, nature_pred, average='weighted')
nature_f1_score = f1_score(all_labels, nature_pred, average='weighted')

print("Nature Accuracy:", nature_accuracy)
print("Nature Precision:", nature_precision)
print("Nature Recall:", nature_recall)
print("Nature F1-score:", nature_f1_score)


Nature Accuracy: 0.4915
Nature Precision: 0.4946508407171669
Nature Recall: 0.4915
Nature F1-score: 0.3775933200754689


In [17]:
# Step 10: Save Nature Model

nature_model_file = 'nature_model.h5'
nature_model.save(nature_model_file)
print("Nature Model saved successfully.")

Nature Model saved successfully.
