In [2]:
import nltk
import random

# !pip install sentence_transformers
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

from elasticsearch import Elasticsearch
import pandas as pd

import time

import torch
import torch.nn.functional as F

from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_class = AutoModelForSequenceClassification.from_pretrained('morenolq/spotify-podcast-advertising-classification')
tokenizer = AutoTokenizer.from_pretrained('morenolq/spotify-podcast-advertising-classification')

model = SentenceTransformer('pinecone/distiluse-podcast-nq')

# Only run this once, they will be downloaded.
nltk.download('stopwords',quiet=True)
nltk.download('wordnet',quiet=True)
nltk.download('punkt',quiet=True)
nltk.download('omw-1.4',quiet=True)

  from .autonotebook import tqdm as notebook_tqdm


True

In [3]:
# Assuming you have already created an Elasticsearch client instance 'es'
es = Elasticsearch("http://localhost:9200")

In [4]:
#create greetings and greetings function

GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["Hello"]

# Checking for greetings
def greeting(sentence):
    """If user's input is a greeting, return a greeting response"""
    
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [5]:
def show_search(selected_show):
    # Tokenize the user's input using nltk

    # Construct the Elasticsearch query to search in specific fields (e.g., show_name, episode_name)
    # Customize the index and field names as per your Elasticsearch setup

    show_body = {
        "query": {
            "multi_match": {
                "query": selected_show,  # Combine words with space for multi_match query
                "fields": ["show_name", "show_description"],
                "fuzziness": 1
            }
        }
    }

    # Execute the Elasticsearch search query
    # show_search_results = es.search(index="spotify_podcast_transcripts", body=show_body)
    results = es.search(index="spotify_podcast_transcripts", body=show_body)

    # Extract relevant information from the search results
    hits = results["hits"]["hits"]
    show_search_results = pd.DataFrame.from_records([hit["_source"] for hit in hits])

    return show_search_results


In [6]:
def chosen_show(show_search_results_df):
    try:
        unique_shows = show_search_results_df['show_name'].unique()
        print(f"Great, I found {len(unique_shows)} shows for you:")
        [print(f"{i+1}. {j}") for i, j in enumerate(unique_shows)]

        print("\nPlease type the corresponding number, 1, 2 etc., for the show you want to search for.")
        user_selected_show = input()

        show_index = int(user_selected_show)-1
        
        print(f"I just want to confirm, you selected {unique_shows[show_index]}, correct?")
        print("Please type yes or no:\n")

        show_confirmation = input().lower()
        return show_confirmation, show_index, unique_shows
    
    except Exception:
        print("problem with chosen_show")

In [7]:
def episode_response(show_of_interest):

    try:
        show_of_interest_body = {
                                "query": {
                                    "term": {
                                    "show_name.keyword": show_of_interest
                                    }
                                },
                                "_source": ["episode_name", "transcript"]
                                }

        results = es.search(index="spotify_podcast_transcripts", body=show_of_interest_body)
        # Extract relevant information from the search results
        hits = results["hits"]["hits"]
        episode_hits = pd.DataFrame.from_records([hit["_source"] for hit in hits])
        # Execute the Elasticsearch search query
        return episode_hits
    except:
        print("problem with episode_response")

In [8]:
def confirm_chosen_episode(episode_hits):
    try: 
        print("\nPlease type the corresponding number, 1, 2 etc., for the episode you want to search for.")
        # # Receive user input and cast it to lowercase
        user_selected_episode = input()

        episode_index = int(user_selected_episode)-1

        name_chosen_episode = episode_hits['episode_name'][episode_index]
        print(f"I just want to confirm, you selected {name_chosen_episode}, correct?")
        print("Please type yes or no:\n")

        user_episode_confirmation = input().lower()

        return name_chosen_episode, user_episode_confirmation, episode_index
    
    except Exception:
        print("Problem confirm_chosen_episode")

In [9]:
def transcript_response(episode_hits, episode_index):
    try: 

        sent_tokens = episode_hits['sentence_tokens'][episode_index]
        
        sentence_outputs_tuples = []

        for i, s in enumerate(sent_tokens[:200]): 
            if i==0:
                context = "__START__"
            else:
                context = sent_tokens[i-1] 
            out = tokenizer(context,
                            s,
                            padding = "max_length",
                            max_length = 256,
                            truncation=True,
                            return_attention_mask=True,
                            return_tensors = 'pt')
            outputs = model_class(**out)
            
            # probabilities = F.softmax(outputs.logits, dim=1)
            predictions = torch.argmax(outputs.logits, dim=-1)
            
        if predictions.item() == 1:
            sentence_outputs_tuples.append((s, predictions.item()))

    if len(sentence_outputs_tuples) == 0:
        print("Sorry, no results found for your query. Please try again")
    else:
        print("Here are the advertisers for this episode:")
        for sentence_output in sentence_outputs_tuples:
            print(sentence_output)

    except Exception:
        print("Problem with transcript_response")

In [10]:
def confirming_the_show (unique_shows, show_index):
        
    show_of_interest = unique_shows[show_index]
    print(show_of_interest)
    print(f"Great, thanks for confirming. Let me search for {show_of_interest} episodes.")
    
    # num_episodes_found, episode_hits = corresponding_episodes(show_of_interest)
    
    episode_hits = episode_response(show_of_interest)
    num_episodes_found = len(episode_hits)
    print(f"Great, I found {num_episodes_found} episode(s) for you:")
    [print(f"{i+1}. {j}") for i, j in enumerate(episode_hits['episode_name'])]      
    
    return num_episodes_found, episode_hits


In [16]:

# Chatbot interaction code
flag = True
print('''Welcome to the Podcast Information Chatbot. 
I can help you find the advertisers for specific shows or episodes to your favorite podcasts.
For your first question, please type the show name. I will search and confirm your input.
To end the session, please type the word "exit"\n''')



# While the chat is open...
while flag:

    # Receive user input and cast it to lowercase
    user_response = input().lower()

    # Handle end of chat
    if user_response != 'exit':

        # Respond kindly if user responds kindly
        if user_response in ['thanks', 'thank you']:
            flag = False
            print("Answer: You are welcome!")

        # Handle user response
        else:
            # Check if user response includes some existing greeting and choose a random greeting
            if greeting(user_response) is not None:
                print("Answer: " + greeting(user_response))

            else:
                # Search in Elasticsearch based on user input
                # show_search_results_df, episode_search_results_df = search_show_episode(user_response)
                
                show_search_results_df = show_search(user_response)

                if not show_search_results_df.empty:
                    
                    show_confirmation, show_index, unique_shows = chosen_show(show_search_results_df)

                    if show_confirmation in ['y', "yes", "yea", "sure", "ok", "okay", "k"]:
                        
                        num_episodes_found, episode_hits = confirming_the_show(unique_shows, show_index)

                        if num_episodes_found == 1:

                            transcript_response(episode_hits, episode_index=0)

                        else:
                            name_chosen_episode, user_episode_confirmation, episode_index = confirm_chosen_episode(episode_hits)

                            if user_episode_confirmation in ['y', "yes", "yea", "sure", "ok", "okay", "k"]:
                                
                                print(f"Okay, let me search for {name_chosen_episode} advertisers:")
                                
                                transcript_response(episode_hits, episode_index)

                            elif user_episode_confirmation == 'exit':
                                flag = False

                    elif show_confirmation == 'exit':
                        flag = False
                        
                else:
                    print("Sorry, no results found for your query. Please try again")

    else:
        flag = False
        print("Thank you for using the Podcast Information Chatbot. Goodbye.")

Welcome to the Podcast Information Chatbot. 
I can help you find the advertisers for specific shows or episodes to your favorite podcasts.
For your first question, please type the show name. I will search and confirm your input.
To end the session, please type the word "exit"

Great, I found 8 shows for you:
1. Vulgar History
2. History Hall Park Academy 
3. Political Scandals 
4. Four For The Road
5. Morning Cup Of Murder
6. Biographics: History One Life at a Time
7. Jewish History with Rabbi Dr. Dovid Katz 
8. Return To Tradition

Please type the corresponding number, 1, 2 etc., for the show you want to search for.
I just want to confirm, you selected Political Scandals , correct?
Please type yes or no:

Political Scandals 
Great, thanks for confirming. Let me search for Political Scandals  episodes.
Great, I found 1 episode(s) for you:
1. Scandal 36: Carroll Hubbard
["He gave people up to one thousand dollars to make contributions to his wife's campaign as the book of love says if y