In [12]:
import os
import cv2
import pytesseract
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import openai

In [13]:
# Set my OpenAI API key
openai.api_key = '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'

In [14]:
# Step 1: Data Preparation
data_folder = 'C:/Users/Syed Issam Bukhari/Documents/projects all/projects/islamop/Memes'


In [15]:
# Step 2: Text Detection
def extract_text_from_image(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    text = pytesseract.image_to_string(gray)
    return text

In [16]:
# Step 3: Function to preprocess the image for Nature Classification
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    img = img.astype('float32') / 255.0
    return img

In [17]:
# Step 4: Function to perform image classification using CLIP
def classify_image(image_path, text):
    img = preprocess_image(image_path)
    inputs = {
        'images': [img],
        'text': text
    }
    result = openai.Classification.create(model="clip-v1", inputs=inputs)
    label = result['choices'][0]['label']
    return label

In [18]:
# Step 5: Function to extract text features using TF-IDF
def extract_text_features(data_folder):
    images = []
    labels = []
    for class_label, class_name in enumerate(os.listdir(data_folder)):
        class_folder = os.path.join(data_folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        for file_name in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file_name)
            image = extract_text_from_image(file_path)
            images.append(image)
            labels.append(class_name)  # Use class_name as labels

    vectorizer = joblib.load('vectorizer.pkl')  # Load the vectorizer from previous training
    text_features = vectorizer.transform(images)
    return text_features, labels

In [19]:
# Step 6: Function to preprocess the text for prediction
def preprocess_text(text):
    # Dummy function as we are using CLIP for text classification
    return text

In [20]:
# Step 7: Load the Text Model (SVC model was used previously for training, now we use CLIP for text classification)
text_model = None  # We won't use SVC for text classification anymore


In [21]:
# Step 8: Model Training for Nature Classification
def train_nature_model():
    all_images, all_labels = load_images(data_folder)
    nature_model = Sequential()
    nature_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
    nature_model.add(MaxPooling2D((2, 2)))
    nature_model.add(Conv2D(64, (3, 3), activation='relu'))
    nature_model.add(MaxPooling2D((2, 2)))
    nature_model.add(Conv2D(128, (3, 3), activation='relu'))
    nature_model.add(MaxPooling2D((2, 2)))
    nature_model.add(Flatten())
    nature_model.add(Dense(128, activation='relu'))
    nature_model.add(Dense(len(set(all_labels)), activation='softmax'))
    nature_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    nature_model.fit(all_images, all_labels, epochs=10, batch_size=32)
    return nature_model

In [22]:
# Step 9: Model Evaluation for Nature Classification
def evaluate_nature_model(nature_model, all_images, all_labels):
    nature_pred = np.argmax(nature_model.predict(all_images), axis=-1)
    nature_accuracy = accuracy_score(all_labels, nature_pred)
    nature_precision = precision_score(all_labels, nature_pred, average='weighted')
    nature_recall = recall_score(all_labels, nature_pred, average='weighted')
    nature_f1_score = f1_score(all_labels, nature_pred, average='weighted')
    print("Nature Accuracy:", nature_accuracy)
    print("Nature Precision:", nature_precision)
    print("Nature Recall:", nature_recall)
    print("Nature F1-score:", nature_f1_score)

In [23]:
# Step 10: Save Nature Model
def save_nature_model(nature_model):
    nature_model_file = 'nature_modelc.h5'
    nature_model.save(nature_model_file)
    print("Nature Model saved successfully.")

In [None]:
# Step 11: Load and Preprocess Images for Nature Classification
def load_images(data_folder):
    images = []
    labels = []
    for class_label, class_name in enumerate(os.listdir(data_folder)):
        class_folder = os.path.join(data_folder, class_name)
        if not os.path.isdir(class_folder):
            continue
        for file_name in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file_name)
            image = preprocess_image(file_path)
            images.append(image)
            labels.append(class_label)
    return np.array(images), np.array(labels)
# Main function
def main():
    text_features, labels = extract_text_features(data_folder)
    text_features = text_features.toarray()

    # Step 4: Model Training for Text Classification (using CLIP)
    # We don't need to train the model as we will use CLIP for text classification

    # Step 5: Model Evaluation for Text Classification (using CLIP)
    # We don't need to evaluate the model as we will use CLIP for text classification

    # Step 6: Save Text Model and Vectorizer (not required for CLIP)

    # Step 7: Load and Preprocess Images for Nature Classification
    all_images, all_labels = load_images(data_folder)

    # Step 8: Model Training for Nature Classification
    nature_model = train_nature_model()
 # Step 9: Model Evaluation for Nature Classification
    evaluate_nature_model(nature_model, all_images, all_labels)

    # Step 10: Save Nature Model
    save_nature_model(nature_model)

if __name__ == "__main__":
    main()