In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from sklearn.decomposition import LatentDirichletAllocation
import requests
import random

In [3]:
df = pd.read_csv('memes.csv')

In [4]:
missing_values = df.isnull().sum()

In [5]:
df = df.dropna()

In [6]:
df['Description'] = df['Text0'] + ' ' + df['Text1']

In [7]:
train_data, test_data, train_target, test_target = train_test_split(
    df['Description'], df['Industry'], test_size=0.2, random_state=42
)

In [8]:
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(train_data, train_target)

In [9]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Print the value counts
print(df['Industry'].value_counts())

Industry
Entertainment                  142
Art                            142
Programmer                     132
Finance                        131
Food                           130
Education                      129
Business                       129
Fitness                        129
Technology                     129
Travel                         128
Meditation                     128
Student                        128
Photography                    128
Engineer                       128
Beauty                         128
Web Development                127
Influencers                    123
Healthcare                      79
Law                             66
Marketing                       59
Work place                      28
Culinary                        23
Communication                   21
Gardening                       20
DIY                             18
Single                          17
Cybersecurity                   16
Home Improvement                13
Media      

In [10]:

# Make predictions on the test set
predictions = model.predict(test_data)

# Evaluate the model
print(classification_report(test_target, predictions))

predictions = model.predict(test_data)

# Evaluate the model
accuracy = (predictions == test_target).mean()
print(f"Model Accuracy: {accuracy}")

                          precision    recall  f1-score   support

                Advocate       0.00      0.00      0.00         2
          Allergy Season       0.00      0.00      0.00         1
                     Art       0.35      0.90      0.50        20
              Automotive       0.00      0.00      0.00         1
                   Baker       0.00      0.00      0.00         1
        Bargain Shoppers       0.00      0.00      0.00         1
                  Beauty       0.78      0.97      0.86        29
       Beauty Enthusiast       0.00      0.00      0.00         1
                  Bidder       0.00      0.00      0.00         1
                 Bingers       0.00      0.00      0.00         1
             Book Lovers       0.00      0.00      0.00         1
                Business       0.77      0.89      0.83        27
     Campus Weather Woes       0.00      0.00      0.00         1
             Celebration       0.00      0.00      0.00         1
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:

# Single paragraph as input
new_description = "I am coder"

# Predict the industry for the input paragraph
new_prediction = model.predict([new_description])

# Retrieve memes related to the predicted industry
predicted_memes = df[df['Industry'] == new_prediction[0]]

# Randomly select 10 memes from the predicted industry
selected_memes = predicted_memes.sample(n=10)

for index, data in selected_memes.iterrows():
    url = f"https://api.imgflip.com/caption_image?template_id={data['Meme_id']}&username=rishabhnahar&password=QTyk9F6g!YYbgBz&text0={data['Text0']}&text1={data['Text1']}"
    print(data['Meme_id'])
    response = requests.post(url)
    if response.status_code == 200:
        print("API Response:")
        print(response.json()['data']['url'])
    else:
        # Print an error message if the request was not successful
        print(f"Error: {response.status_code}, {response.text}")


166969924
API Response:
https://i.imgflip.com/8o8dsr.jpg
166969924
API Response:
https://i.imgflip.com/8o8dsx.jpg
166969924
API Response:
https://i.imgflip.com/8o8dsz.jpg
89370399
API Response:
https://i.imgflip.com/8o8dt5.jpg
259237855
API Response:
https://i.imgflip.com/8o8dt8.jpg
259237855
API Response:
https://i.imgflip.com/8o8dte.jpg
259237855
API Response:
https://i.imgflip.com/8o8dtl.jpg
89370399
API Response:
https://i.imgflip.com/8o8dtu.jpg
166969924
API Response:
https://i.imgflip.com/8o8du1.jpg
166969924
API Response:
https://i.imgflip.com/8o8du8.jpg


In [17]:

# # New input description
# new_description = "I am Student"

# # Make predictions for the new input
# new_prediction = model.predict([new_description])

# # Display the predicted industry category
# print(f"Predicted Industry: {new_prediction}")

# # Retrieve memes related to the predicted industry
# predicted_memes = df[df['Industry'] == new_prediction[0]]


# # Display the memes
# for index, data in predicted_memes.iterrows():
#     meme_id = data['Meme_id']
#     text0 = data['Text0']
#     text1 = data['Text1']

#     # Construct the URL
#     url = f"https://api.imgflip.com/caption_image?template_id={meme_id}&username=rishabhnahar&password=QTyk9F6g!YYbgBz&text0={text0}&text1={text1}"

#     # print(url) # print(data.Meme_id)
#     # Make the API call
#     response = requests.post(url)

#     # Check if the request was successful (status code 200)
#     if response.status_code == 200:
#         # Print the response content (usually in JSON format)
#         print("API Response:")
#         print(response.json()['data']['url'])
#     else:
#         # Print an error message if the request was not successful
#         print(f"Error: {response.status_code}, {response.text}")


In [18]:
import pandas as pd
import random
from nltk import word_tokenize
from collections import defaultdict

def load_meme_dataset(file_path):
    df = pd.read_csv(file_path)
    return df

def train_markov_chain_model(meme_dataset):
    model = defaultdict(list)

    # Tokenize text0 and text1 and build the Markov chain model
    for _, row in meme_dataset.iterrows():
        tokens0 = word_tokenize(row['Text0'])
        tokens1 = word_tokenize(row['Text1'])
        tokens2 = word_tokenize(row['Industry'])

        for i in range(len(tokens0) - 1):
            model[tokens0[i]].append(tokens0[i + 1])

        for i in range(len(tokens1) - 1):
            model[tokens1[i]].append(tokens1[i + 1])
            
        for i in range(len(tokens2) - 1):
            model[tokens2[i]].append(tokens2[i + 1])


    return model

def generate_markov_text(user_input, markov_model, max_length=15):
    words = word_tokenize(user_input.lower())
    generated_text = words.copy()

    for _ in range(max_length):
        current_word = generated_text[-1]
        next_word_options = markov_model.get(current_word, [])
        
        if not next_word_options:
            break  # Stop if no options are available for the current word
        
        next_word = random.choice(next_word_options)
        generated_text.append(next_word)

    return ' '.join(generated_text)


# Example user input
user_input = "Write a meme on my coding journey"
markov_model = train_markov_chain_model(df)

generated_text = generate_markov_text(user_input, markov_model)
print("Generated Meme-like Text:", generated_text)

Generated Meme-like Text: write a meme on my coding journey ?
