In [1]:
# importing all the necessary packages for the PunBot
from collections import Counter
import spacy
word2vec = spacy.load('en_core_web_lg')
import csv
from nltk.corpus import wordnet
from nltk import pos_tag
from nltk.tokenize import word_tokenize
import random

# this package allows PunBot to be integrated with telegram
import telebot

# importing pre-written functions for entity recognition and response selection 
from user_functions import preprocess, compare_overlap, pos_tag, extract_nouns, compute_similarity

In [3]:
# This function is borrowed from the 2020 github publication by Kamran Janjua available at https://github.com/kjanjua26/Pyphones/tree/master

from urllib.request import urlopen
import requests
from bs4 import BeautifulSoup
from typing import Dict, List
import re

class Pyphones:
    
    def __init__(self, word):
        self.word = word
        self.url = "https://www.homophone.com/search?page={}&type=&q={}"
        self.homophones = {self.word: []}
        
    def get_the_page(self, page_no=1):
        """
        Get the page content.

        Returns
            str: the content of the page.
        """
        url = self.url.format(page_no, self.word)
        r = requests.get(url)
        soup = BeautifulSoup(r.content, "html.parser")
        return soup

    def get_the_page_nos(self):
        """
        Get the total number of pages

        Returns
            int: the total number of the pages.
        """
        soup = self.get_the_page()
        pages = soup.find_all('div', attrs={'class':'col-sm-9'})
        total_pages = pages[0].find('h5').text.split('/')[-1].strip()
        return int(total_pages)
    

    def get_the_homophones(self):
        """
        Get the homophones of the word.

        Returns
            dict: {word: [list_of_homophones]} against each word.
        """
        total_pages = self.get_the_page_nos()
        for ix in range(total_pages):
            page_no = ix + 1
            soup = self.get_the_page(page_no)
            raw_homophones = soup.find_all('div', attrs={'class': 'well well-lg'})
            for elem in range(len(raw_homophones)):
                raw_homophones_2 = raw_homophones[elem].find_all('a', attrs={'class': 'btn word-btn'})
                list_of_homophones = list(raw_homophones_2)
                if any(list_of_homophones):
                    local_homophones = []
                    for tag_of_homophone in list_of_homophones:
                        homophone = tag_of_homophone.text
                        local_homophones.append(homophone)
                    self.homophones[self.word].append(local_homophones)
                    
    

        return self.homophones

In [9]:
# I use a database of idioms provided by zaghloul404 (2023), available at https://github.com/zaghloul404/englishidioms/tree/main
# In this section, I remove artifacts from idioms and remove idioms that start with a verb 

def starts_with_verb(idiom):
    first_word = idiom.split()[0]
    synsets = wordnet.synsets(first_word)
    for synset in synsets:
        if synset.pos() == 'v':
            return True
    return False

def remove_rows_with_verbs_or_brackets(file_path):
    # Open the CSV file
    with open(file_path, 'r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        header = next(reader)

        filtered_rows = []
        for row in reader:
            row[2] = row[2].replace("†", "").replace("*", "").replace("(", "").replace(")", "")
            if not starts_with_verb(row[2]):
                filtered_rows.append(row)

    return header, filtered_rows

# Applying the functions to the idioms
file_path = "idioms.csv" 
header, filtered_rows = subtitute_brackets(file_path)

# Write the filtered rows to a new CSV file
with open("filtered_idioms.csv", 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(header) 
    writer.writerows(filtered_rows)

print("Rows removed if they start with a verb. Filtered idioms saved to filtered_idioms.csv")

Rows removed if they start with a verb. Filtered idioms saved to filtered_idioms.csv


In [6]:
# These functions capture grammatical variations in the idioms
# This element aims to improve the quality of PunBot's responses

# checking if the idiom contains a verb, starts with a starting word that leads to the same grammatical structure

def contains_verb(idiom):
    words = idiom.split()
    common_starters = ['i', "i've", "i'll", "you", "you'll", "you've", 'if', "there's", "that's", "there're"]
    if words and words[0].lower() in common_starters:
        return True
    
    words = word_tokenize(idiom)
    tagged_words = pos_tag(words)
    for word, pos in tagged_words:
        if pos.startswith('V'):
            return True

    return False

# Check if the idiom starts with a not and contains a verb 
def starts_with_not_verb(idiom):
        words = idiom.split()
        if words and words[0].lower() == 'not':
            words = word_tokenize(idiom)
            tagged_words = pos_tag(words)
            for word, pos in tagged_words:
                if pos.startswith('V'):
                    return True
        return False    

# Check if the idiom starts with cannot or doesn't
def starts_with_cannot_verb(idiom):
    words = idiom.split()
    common_starters = ['cannot', "doesn't", "don't"]
    if words and words[0].lower() in common_starters:
        return True

    return False

 # Check if the idiom starts with a
def starts_with_a(idiom):
        words = idiom.split()
        
        if words and words[0].lower() == 'a':
            words = word_tokenize(idiom)
            tagged_words = pos_tag(words)
            for word, pos in tagged_words:
                if not pos.startswith('V'):
                    return True
        return False
    
# Check if the idiom containts a to  
def contains_to(idiom):
    if "to" in idiom:
            return True
    return False

In [18]:
# Here I define all fixed elements

# These commands ensure that PunBot does not generate response when the user wants to 
# leave the chat or expresses satisfaction with the pun

exit_commands = ("quit", "goodbye", "exit", "bye", 'no', 'do not')
positive_commands = ("yes", "funny", "fun", "hilarious", 'thank', 'super', 'of course', 'sure')
hi_commands = ("hi", "hello")

# These pre-defined responses are aimed to tailor for the specific requests by the user
response_default = "Here is one hilarious joke about {} "
response_a = "Of course I have a provokative pun about {} "
response_b = "I agree, {} is a great topic for a dirty pun"
response_c = "My father once told me a nice silly pun about {}"
response_d = "My grandfather once told me a nice dark humor pun about {}"
response_e = "Ow, I have plenty dummy puns on {}"
response_f = "Your offensive pun on {} is being generated"

# 'Object' tends to work the best for entity recognition 
blank_spot = "object"

# Loading the filtered idioms
idiom_file = "filtered_idioms.csv"

# The first part of the pun is always the same question that takes into user-specific entity
# and a random adjective
question = "Question: Why is the {} so {}?"

# Although adjectives have no baring on the content of the pun, they add some 
# randomness and thus contribute to the comic effect
states = [ 'happy', 'sad', 'tired', 'worried', 'sleepy', 'angry', 'smart', 'excited']

# The second part of the pun is an idiom-specific answer that takes into a modified idiom
# Answers are desigend to capture grammatical variation in idioms
answer_no_verb = "Answer: Because it is {}"
answer_verb = "Answer: Because {}"
answer_verb_not = "Answer: Because it is/does {}"
answer_verb_cannot = "Answer: Because it {}"
answer_a = "Answer: Because of {}"
answer_to = "Answer: Because it is {}"

responses = [response_default, response_a, response_b, response_c, response_d, response_e, response_f]

In [25]:
# This section provides functions that allow PunBot to receive messages from the user and generate puns
# The code is designed to work with Telegram

# Initializing PunBot with an API token
bot = telebot.TeleBot("#######")

# defining an exit function
def make_exit(user_message):
    for object in exit_commands: 
        if object in user_message: 
            return True 
    return False

# The idiom finder
def find_idioms(entity):

    idioms_with_entity = []

    with open(idiom_file, 'r') as file:
        reader = csv.reader(file)
        next(reader) 
        for row in reader:
            idiom = row[2].strip() 
            if entity in idiom:
                idioms_with_entity.append(idiom)

    if idioms_with_entity:
        random_idiom = random.choice(idioms_with_entity)
        modified_idiom = random_idiom.replace(entity, '{}')
        return modified_idiom.lower()
    else:
        return False  


    
# The homophone replacement
def replace_with_homophones(entity):
    py = Pyphones(entity)
    try:
        homophones = py.get_the_homophones()
    except IndexError:
        return False
    
    if homophones is not False:
        words = entity.split()
        for i in range(len(words)):
            word = words[i].lower()
            if word in homophones:
                homophone_lists = homophones[word]
                for homophone_list in homophone_lists:
                    if homophone_list:
                        selected_homophone = random.choice(homophone_list)
                        words[i] = selected_homophone
        modified_entity = ' '.join(words)
        return modified_entity
    else:
        return False

# Findging intent
def find_intent_match(responses, user_message):
    bow_user_message = Counter(preprocess(user_message))
    processed_responses = [Counter(preprocess(response)) for response in responses]
    similarity_list = [compare_overlap(doc, bow_user_message) for doc in processed_responses]
    response_index = similarity_list.index(max(similarity_list))
    return responses[response_index]

# Finding entities
def find_entities(user_message):
    
    # replace 'pun' in the message to avoid confusion 
    user_message = user_message.replace('pun', '').replace('please', '')
    
    tagged_user_message = pos_tag(preprocess(user_message))
    message_nouns = extract_nouns(tagged_user_message)
    tokens = word2vec(" ".join(message_nouns))
    category = word2vec(blank_spot)
    word2vec_result = compute_similarity(tokens, category)
    word2vec_result.sort(key=lambda x: x[2])
    if len(word2vec_result) < 1:
        return blank_spot
    else:
        return word2vec_result[-1][0]

# The respond function 
def respond(user_message):
    
    best_response = find_intent_match(responses, user_message)
    entity = find_entities(user_message)
    response_text = best_response.format(entity)

    random_state = random.choice(states)
    
    question_text = question.format(entity, random_state)

    if replace_with_homophones(entity) is not False: 
        pun_entity = replace_with_homophones(entity)
    else: 
        pun_entity = entity

    idiom = find_idioms(entity)

    if idiom is False: 
        return "Sorry.. can't think of any pun on that actually..."

    
    else: 
        idiom_punned = idiom.format(pun_entity)
        
        answer_text = answer_no_verb.format(idiom_punned)

        # using different answers for different grammar structures
        if contains_verb(idiom): 
            answer_text = answer_verb.format(idiom_punned)
        if starts_with_a(idiom):
            answer_text = answer_a.format(idiom_punned)
        if starts_with_cannot_verb(idiom):
            answer_text = answer_verb_cannot.format(idiom_punned)
        if contains_to(idiom):
            answer_text = answer_to.format(idiom_punned)
            
            
        response_text = "{}\n\n{}\n{}".format(response_text, question_text, answer_text)
        
        idiom_punned = ''
        idiom = ''
        entity = ''
        
        return response_text
    
        

# Define a Telegram handler for the start command
@bot.message_handler(commands=['start'])
def send_welcome(message):
    bot.send_message(message.chat.id, "Welcome to Pun Bot! Would you prefer a silly, dirty or a provocative pun? Also, what should be the topic of the pun?")

# Define a Telegram handler for incoming messages
@bot.message_handler(func=lambda message: True)
def handle_message(message):
    user_message = message.text.lower()
    
    found_exit_command = False
    for command in exit_commands:
        if command in user_message:
            found_exit_command = True
            bot.send_message(message.chat.id, "Oki-doki. See you later, alligator!")
            break
            
    found_hi_command = False
    for command in hi_commands:
        words = user_message.split()
        if command in user_message.lower() and len(words) < 3:
            found_hi_command = True
            bot.send_message(message.chat.id, "Hi! Wanna hear a pun?")
            break
    
    found_positive_command = False
    for command in positive_commands:  # corrected variable name
        if command in user_message:
            found_positive_command = True
            bot.send_message(message.chat.id, "Thanks! Please specify the topic for another ridiculous pun!")
            break
                
    if not found_positive_command and not found_exit_command and not found_hi_command:
            response_text = respond(user_message)
            bot.reply_to(message, response_text)
            bot.send_message(message.chat.id, "How about another one?")

# Polling for messages
bot.polling()