# Import Dependencies

In [1]:
import nltk
import random
import numpy as np

# !pip install sentence_transformers
from sentence_transformers import SentenceTransformer

from elasticsearch import Elasticsearch

# Assuming you have already created an Elasticsearch client instance 'es'
es = Elasticsearch("http://localhost:9200")
import pandas as pd

import torch
import torch.nn.functional as F

import time
import nltk

from typing import Tuple


from transformers import AutoTokenizer, AutoModel
pre_trained_model_name = "sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco"

tokenizer = AutoTokenizer.from_pretrained(pre_trained_model_name) 
bert_model = AutoModel.from_pretrained(pre_trained_model_name)

# Test sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco on Episodes

In [3]:
# test sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco on indicator episodes
indicator_episodes = ["spotify:episode:5fG4VlWnWwzAt6mSs0H7lY", "spotify:episode:7JG3lLnRoDdOxuqjf14ZkM",
                            "spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V", "spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L",
                            "spotify:episode:5xBPWxqVCocdBgybmHjr5V", "spotify:episode:0X663c1I6j1cehJvy10WMm", 
                            "spotify:episode:61a1JjZO27lGCvCwBaCkpC", "spotify:episode:0goWRy1gwB23rQVy8ci7Wa", 
                            "spotify:episode:0BSD8QYmd2mQ1V43uIrU4I", "spotify:episode:5xH3cdpkxnJhQjPV22sxKC",
                            "spotify:episode:0YPvJfSEw0jacPB3IeT37d", "spotify:episode:1gnpv26FFvIxpnwVbbRXv1",
                            "spotify:episode:6rh4J52THn252yi7t11Yqf", "spotify:episode:3IfmcM2rcWb82601pkPvCh",
                            "spotify:episode:5LJ33LdXWhqOu1KNad6D5q", "spotify:episode:1AxFBio6NwwG0MAjdCK5gK",
                            "spotify:episode:3U33mRnDJcXywmBm1hahlL", "spotify:episode:57Nzb9H2VRZgHcNFChwbBG",
                            "spotify:episode:6vRLNVEQ4xqtMxnms3RZh3", "spotify:episode:1tN044BhlPjjiluZ7Wo7UL",
                            "spotify:episode:1fs86N6FLUKW2e5NdX1dF1", "spotify:episode:1A4cHtP3wIVQutpCgS7kd7",
                            "spotify:episode:1Mi90UjG91rm73qvHzcG0t", "spotify:episode:3TuC8HZp9VdXtxYMQkJI0m",
                            "spotify:episode:67v8V9SOXxivYQfAHSsc5f", "spotify:episode:3QE8qxHtJg3zMrq03R4GOj",
                            "spotify:episode:3Vr6AUCTQgVWoE137b4IdB", "spotify:episode:4oQZAcd6xnxVA1e2GWaIFb",
                            "spotify:episode:4sbSs1xtuf8dF3xtb7btUi", "spotify:episode:2RoLspr2PQTki51sfMhd20"]

In [91]:
def apply_distilbert_dot_model(sentence:str,error_list:list):-> Tuple:

""" 
    Applies the distilbert dot model to the sentence and returns the sentence and the score for the sentence.

    Args:
        sentence (str): The sentence to be evaluated.
        error_list (list): The list of sentences that were not evaluated.

    Returns:
        Tuple: The sentence and the score for the sentence.
"""

    error_sentences = []
    try:
        # Encode the sentence
        sentence_input = tokenizer(sentence, return_tensors="pt")
    except Exception as error:
        print("Error:", error)
        print("Something wrong with the sentence tokenization", sentence)
        error_list.append(sentence)
        pass
    
    try:
        # Confert ids to tokens 
        tokenizer.convert_ids_to_tokens(sentence_input["input_ids"][0])
    except Exception as error:
        print("Error:", error)
        print("Something wrong with the sentence tokenization, converting ids to tokens", sentence)
        error_list.append(sentence)
        pass

    try:
        # Encode the query
        passage1_encoded = bert_model(**sentence_input)[0][:,0,:].squeeze(0)
    except Exception as error:
        print("Error:", error)
        print("Something wrong with the sentence encoding", sentence)
        error_list.append(sentence)
        pass

    try:
        # Produce dot score for passage
        score_for_p1 = query_encoded.dot(passage1_encoded)
    except Exception as error:
        print("Error:", error)
        print("Something wrong with the sentence encoding", sentence)
        error_list.append(sentence)
        pass
    return sentence, score_for_p1

In [116]:
of_interest = []

# Query input
query_input = tokenizer("who are the advertisers, sponsors, advertisement, or ads and/or businesses, people, teams thanked?",return_tensors="pt")
print("Query Tokenized:",tokenizer.convert_ids_to_tokens(query_input["input_ids"][0]))
query_encoded    = bert_model(**query_input)[0][:,0,:].squeeze(0)

# Loop through all the episodes
for episode in indicator_episodes:
    
    try:
        episode_ = {
            "query": {
                "match_phrase": {
                    "_id": episode,
                }
            }
        }

        # Execute the Elasticsearch search query
        # show_search_results = es.search(index="spotify_podcast_transcripts", body=show_body)
        results = es.search(index="spotify_podcast_transcripts", body=episode_)

        # Extract relevant information from the search results
        hits = results["hits"]["hits"]
    except Exception as error:
        print("Error:", error)
        print("Something wrong with retrieving the episode from Elasticsearch")

    start = time.time()
    print("start epsiode:", episode)
    
    try:
        # Loop through all the sentences in the episode
        sentence_score = []
        error_sentences = []

        # Split the sentences into parts if they are too long
        for sentence in hits[0]['_source']['sentence_tokens']:
            sentence_len = len(sentence)

            # If the sentence is longer than 800 tokens, split it into parts
            if sentence_len > 800:
                split_by = 2
                split = round(sentence_len // split_by,0)
                
                # Split the sentence into parts
                sentence_split = [sentence[i:i+split] for i in range(0, len(sentence), split)]
                print("splitting sentence of length:", sentence_len, "into", split_by, "parts of length:", split)

                # Loop through all the parts
                for sentence in sentence_split:

                    # Apply the distillbert_dot_model
                    sentence, score_for_p1 = apply_distilbert_dot_model(sentence, error_sentences)
                    sentence_score.append((sentence, score_for_p1.item()))
        
            # If the sentence is shorter than 800 tokens, apply the distillbert_dot model without sectioning
            sentence, score_for_p1 = apply_distilbert_dot_model(sentence, error_sentences)
            sentence_score.append((sentence, score_for_p1.item()))

            end = time.time()
    except Exception as error:
        print("Error:", error)
        print("Something wrong with the sentence scoring", sentence)
    
    # Sort the sentences by their score
    sentence_score.sort(key=lambda tup: tup[1], reverse=True)

    # take the top 30 responses and scores
    top_responses = [sentence[0] for sentence in sentence_score[:30]]
    top25_scores = [sentence[1] for sentence in sentence_score[:30]]
    
    # print("top 25 responses:", top_responses)
    # print("top 25 scores:", top25_scores)

    total_time = round((end-start)/60, 2)
    print(f"Total time for {episode}: {total_time} minutes")

    episode_info = pd.DataFrame({
        "episode_uri": episode,
        "top_responses": top_responses,
        "top_scores": top25_scores,
        "total_time": total_time
        })
    print(episode_info.shape)
    of_interest.append(episode_info)

print(len(of_interest))

Query Tokenized: ['[CLS]', 'who', 'are', 'the', 'ad', '##vert', '##iser', '##s', ',', 'sponsors', ',', 'advertisement', ',', 'or', 'ads', 'and', '/', 'or', 'businesses', ',', 'people', ',', 'teams', 'thanked', '?', '[SEP]']
start epsiode: spotify:episode:5fG4VlWnWwzAt6mSs0H7lY
splitting sentence of length: 1194 into 2 parts of length: 597
splitting sentence of length: 1144 into 2 parts of length: 572
splitting sentence of length: 3463 into 2 parts of length: 1731
splitting sentence of length: 1037 into 2 parts of length: 518
splitting sentence of length: 1290 into 2 parts of length: 645
splitting sentence of length: 1183 into 2 parts of length: 591
splitting sentence of length: 4175 into 2 parts of length: 2087
splitting sentence of length: 1502 into 2 parts of length: 751
Total time for spotify:episode:5fG4VlWnWwzAt6mSs0H7lY: 0.89 minutes
(30, 4)
start epsiode: spotify:episode:7JG3lLnRoDdOxuqjf14ZkM
Total time for spotify:episode:7JG3lLnRoDdOxuqjf14ZkM: 0.56 minutes
(30, 4)
start epsi

In [111]:
error_sentences

[]

In [117]:
of_interest[0]

Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"Oh, yeah other Shooters we're super fortunate ...",89.197632,0.89
1,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I can't I can't but but like I'm just that's w...,89.057426,0.89
2,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,And I don't know there's a lot of people inter...,88.724937,0.89
3,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's always a great experience in like you're ...,88.405342,0.89
4,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's just so funny shown up because there's so...,88.26886,0.89
5,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I think like in teams remaining that's or people.,88.256111,0.89
6,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,An organized team and get fucking thrash and t...,88.139984,0.89
7,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"So we're all kind of like, I'm not helping any...",88.095894,0.89
8,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,So everyone's like playing as an individual as...,87.89695,0.89
9,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"And you're all the right spots, but you just g...",87.706169,0.89


In [115]:
# of_interest[0]

Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"Oh, yeah other Shooters we're super fortunate ...",89.197632,0.93
1,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I can't I can't but but like I'm just that's w...,89.057426,0.93
2,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,And I don't know there's a lot of people inter...,88.724937,0.93
3,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's always a great experience in like you're ...,88.405342,0.93
4,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's just so funny shown up because there's so...,88.26886,0.93
5,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I think like in teams remaining that's or people.,88.256111,0.93
6,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,An organized team and get fucking thrash and t...,88.139984,0.93
7,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"So we're all kind of like, I'm not helping any...",88.095894,0.93
8,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,So everyone's like playing as an individual as...,87.89695,0.93
9,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"And you're all the right spots, but you just g...",87.706169,0.93


In [118]:
for df in of_interest:
    display(df.head())

Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"Oh, yeah other Shooters we're super fortunate ...",89.197632,0.89
1,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I can't I can't but but like I'm just that's w...,89.057426,0.89
2,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,And I don't know there's a lot of people inter...,88.724937,0.89
3,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's always a great experience in like you're ...,88.405342,0.89
4,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's just so funny shown up because there's so...,88.26886,0.89


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,"I mean, those are you know, there's some signi...",88.281265,0.56
1,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,I thought that's this was they had something g...,87.972641,0.56
2,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,"So well, I mean like we're going on informatio...",87.631943,0.56
3,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,But I also know that fans like Kyra Williams b...,86.817825,0.56
4,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,We think we know who it is.,86.626434,0.56


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"ed about, you know, like what it is to be like...",89.186562,0.63
1,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,and sometimes they're halfway across the count...,89.177628,0.63
2,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,And a lot of the women's groups on Facebook an...,88.689362,0.63
3,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"Ronnie when wherever we're up that way, but ye...",88.287003,0.63
4,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"You know, I run a website you've got And peop...",87.610291,0.63


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,"So, you know, that's that's one reason why we ...",92.369278,0.57
1,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,We're doing giveaways announcements things lik...,89.14006,0.57
2,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,"You know Heroes on the water, you know, it def...",88.581161,0.57
3,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,We're joined columns and right.,88.402306,0.57
4,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,Thank you so much.,88.307861,0.57


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5xBPWxqVCocdBgybmHjr5V,So you got any anybody like to thank sponsor s...,91.726151,0.78
1,spotify:episode:5xBPWxqVCocdBgybmHjr5V,I had some customers.,89.408089,0.78
2,spotify:episode:5xBPWxqVCocdBgybmHjr5V,"Well, thanks a lot of guys like that.",89.261078,0.78
3,spotify:episode:5xBPWxqVCocdBgybmHjr5V,All right you guys to thanks a lot again.,89.106979,0.78
4,spotify:episode:5xBPWxqVCocdBgybmHjr5V,Thanks.,89.094635,0.78


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0X663c1I6j1cehJvy10WMm,We are all in this together everything from bu...,89.035439,0.29
1,spotify:episode:0X663c1I6j1cehJvy10WMm,So I'm happy to say that I have enough faith i...,88.109879,0.29
2,spotify:episode:0X663c1I6j1cehJvy10WMm,Just went all over the faces of the people sit...,87.690369,0.29
3,spotify:episode:0X663c1I6j1cehJvy10WMm,Thank you so much for listening.,87.602249,0.29
4,spotify:episode:0X663c1I6j1cehJvy10WMm,I want to talk about the impacts that are happ...,87.313004,0.29


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Thank you.,89.589943,0.65
1,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Thanks.,89.094635,0.65
2,spotify:episode:61a1JjZO27lGCvCwBaCkpC,We're just so blessed and happy that you came ...,89.013054,0.65
3,spotify:episode:61a1JjZO27lGCvCwBaCkpC,We're so excited and you guys know where to fi...,88.213264,0.65
4,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Behavior when presented with the same prompt t...,87.90477,0.65


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,"Good job, but also we gave I put up a post on ...",87.776657,0.61
1,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,"Questions to tackle, but before we start specu...",87.741821,0.61
2,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,So thank you very much to you.,87.721283,0.61
3,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,It's a great I think message for anyone right?,87.452133,0.61
4,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,We have they've got they've given us officiall...,86.986946,0.61


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Ruminating on what mistakes the Christian demo...,86.35186,0.3
1,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,It was the red brigades each caller conveyed t...,86.3013,0.3
2,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Accusation to make but that was it to everyone...,86.255318,0.3
3,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Aldo was candid about the failings of the Chri...,86.128883,0.3
4,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,While these futile efforts continued a debate ...,86.128006,0.3


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,There are too many different motives only thre...,85.812935,0.15
1,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,Our next assassin had a more selfish reasoning...,85.683319,0.15
2,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,He said only four percent of victims receive d...,85.526329,0.15
3,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,It was a question fool and family echoed the i...,85.372192,0.15
4,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,Officially Booth wanted revenge on the man who...,85.04982,0.15


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"So I highlighted a large section here, but I r...",87.531731,0.4
1,spotify:episode:0YPvJfSEw0jacPB3IeT37d,But if you're a person of color and have a lot...,87.215431,0.4
2,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"But first of all, we'll go through and do our ...",87.074615,0.4
3,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"You know, the one where they've got the main e...",86.967964,0.4
4,spotify:episode:0YPvJfSEw0jacPB3IeT37d,Thank you so much for joining us Anna you guys...,86.40255,0.4


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,If you have a message if you have some come co...,85.782341,0.08
1,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,I had my name balls did my practices I have re...,85.033485,0.08
2,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,Now my Skyrim everyone welcome back to my shoe...,84.972702,0.08
3,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,It's lot and I hope this possibilities availab...,84.891136,0.08
4,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,"I want myself back sir, but accessible to you ...",84.821716,0.08


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:6rh4J52THn252yi7t11Yqf,I really apologize you and I thank you.,88.978973,0.42
1,spotify:episode:6rh4J52THn252yi7t11Yqf,I applaud you.,88.309372,0.42
2,spotify:episode:6rh4J52THn252yi7t11Yqf,"Just please listen, he's helping me and it mak...",87.807602,0.42
3,spotify:episode:6rh4J52THn252yi7t11Yqf,I never dared me and I wish I did but I just n...,87.645622,0.42
4,spotify:episode:6rh4J52THn252yi7t11Yqf,"Oh my God, I'm really a good friend and David ...",86.723396,0.42


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3IfmcM2rcWb82601pkPvCh,Thank you listeners for supporting these incre...,89.696877,0.1
1,spotify:episode:3IfmcM2rcWb82601pkPvCh,Today's episode is sponsored by glory and shin...,87.014542,0.1
2,spotify:episode:3IfmcM2rcWb82601pkPvCh,Why didn't we see that showing up in love and ...,86.254257,0.1
3,spotify:episode:3IfmcM2rcWb82601pkPvCh,"She is Beloved the Driver, yes beloved the ca...",86.108673,0.1
4,spotify:episode:3IfmcM2rcWb82601pkPvCh,You can find her on Instagram at Krug The Thin...,86.108315,0.1


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Thank you listeners for supporting these incre...,89.696877,0.09
1,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Today's episode is sponsored by glory and shin...,87.014542,0.09
2,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Today's episode is all Also sponsored by pray...,85.987732,0.09
3,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,She is passionate about her faith and has cont...,85.194092,0.09
4,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Please visit pray more Retreat dot-org.,84.700455,0.09


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Thank you.,89.589943,0.18
1,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Etc.,86.95089,0.18
2,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,I've got a question first well means please go...,85.642418,0.18
3,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Even those trusted crew members can sometimes ...,85.614006,0.18
4,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Our pilgrimage pod and Facebook at the pilgrim...,85.602974,0.18


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3U33mRnDJcXywmBm1hahlL,Thank you.,89.589943,0.74
1,spotify:episode:3U33mRnDJcXywmBm1hahlL,Thank you.,89.589943,0.74
2,spotify:episode:3U33mRnDJcXywmBm1hahlL,We have some announcements and reminders is an...,88.014763,0.74
3,spotify:episode:3U33mRnDJcXywmBm1hahlL,So Tina's like who's been Bianca that making m...,87.64444,0.74
4,spotify:episode:3U33mRnDJcXywmBm1hahlL,"So, please feel free to tweet along with us.",87.111893,0.74


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,"Also, if you want to be if you want to sponsor...",89.189102,0.13
1,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,Thank God.,88.359428,0.13
2,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,Thank you so much for listening.,87.602249,0.13
3,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,"Hey, maybe one of those things of this hopeful...",86.95015,0.13
4,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,I'm on a Mobile hairdressing page and I asked...,86.920303,0.13


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,They love it and you know investors are drivin...,88.753769,0.29
1,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,"We talked about earlier, but they also offer o...",88.405243,0.29
2,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,"You know, I'm somebody who's been doing this.",87.956009,0.29
3,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,There's something called eBay Bucks A lot of p...,87.790627,0.29
4,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,If you want like links to those just DME and I...,87.660431,0.29


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,Thanks so much and we'll see you all tomorrow.,87.432022,0.14
1,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,So now that we've gone over the major points a...,85.319595,0.14
2,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,And most within one hour they did not comment ...,84.946251,0.14
3,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,Keep in mind that this podcast is designed to ...,84.841286,0.14
4,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,This is the ortho bullets podcast a daily audi...,84.583199,0.14


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,I would say like entirely So I would like to ...,89.339706,0.43
1,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,Their appreciation support for the franchise a...,88.945366,0.43
2,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,Thank you all for your support.,88.551247,0.43
3,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,It would be very welcome.,87.48336,0.43
4,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,They're going to be paying attention to number...,87.419342,0.43


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,And that is it guys few announcements.,89.736458,0.76
1,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Thank you for joining us.,89.038361,0.76
2,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Don't forget to like comment and subscribe to ...,87.686356,0.76
3,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Like like you you hype up these people the ent...,87.495583,0.76
4,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Thanks for listening guys.,87.459137,0.76


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1Mi90UjG91rm73qvHzcG0t,Hi everyone.,86.922188,0.02
1,spotify:episode:1Mi90UjG91rm73qvHzcG0t,In the near term Asian markets may be in for c...,83.84449,0.02
2,spotify:episode:1Mi90UjG91rm73qvHzcG0t,This has been a podcast from ocbc Bank follow ...,83.572586,0.02
3,spotify:episode:1Mi90UjG91rm73qvHzcG0t,I'm Selena Ling Chief Economist for CPC bang t...,83.389191,0.02
4,spotify:episode:1Mi90UjG91rm73qvHzcG0t,If you reference the G7 meeting of Finance Chi...,83.141045,0.02


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,Thank you.,89.589943,0.1
1,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,I hope you do to thank you so much.,88.546982,0.1
2,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,He's with mr. D. Hello and welcome to Friday f...,86.276268,0.1
3,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,Thanks for having happy Friday everybody.,85.741425,0.1
4,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,I know they do have a parent or advisory warni...,85.498055,0.1


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much for chatting.,88.334587,0.16
1,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much for joining me on for the lo...,87.355186,0.16
2,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much Rodman.,86.162308,0.16
3,spotify:episode:67v8V9SOXxivYQfAHSsc5f,I'm so excited to be here.,85.876465,0.16
4,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Do you have any tips for teachers I guess to p...,85.684448,0.16


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,We just want to say thanks to our sponsor.,95.07843,0.57
1,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,And today Spotify is one of our sponsors on Sp...,92.813988,0.57
2,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.57
3,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.57
4,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.57


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,"So that's and if I'm proud of it, but it's par...",87.354828,0.49
1,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,We are.,87.203232,0.49
2,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,But my guess is that people who work with you ...,87.027206,0.49
3,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,"I think that you know, I just sort of help peo...",86.817032,0.49
4,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,for example I can give you one example that c...,86.807785,0.49


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,Thank you.,89.589943,0.2
1,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,But thank God.,88.522011,0.2
2,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,And thanks again for joining us today.,88.509727,0.2
3,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,"Thank you again, man.",88.089447,0.2
4,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,"I've used other platforms, and I'm so happy I ...",87.564529,0.2


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,They will even help you find other companies t...,90.060837,0.41
1,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,We're your hosts.,89.264397,0.41
2,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,So they're waving and everything and because o...,87.569412,0.41
3,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,We want to take some time to introduce you to ...,87.446594,0.41
4,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,"So again, thanks for listening and we'll see y...",86.970871,0.41


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:2RoLspr2PQTki51sfMhd20,We are grateful for you our listeners you allo...,90.327499,0.29
1,spotify:episode:2RoLspr2PQTki51sfMhd20,In other words Congressional staffers are Ed ...,86.872383,0.29
2,spotify:episode:2RoLspr2PQTki51sfMhd20,Thanks for listening.,86.267189,0.29
3,spotify:episode:2RoLspr2PQTki51sfMhd20,Is or his wife's election campaigns any favors...,86.216476,0.29
4,spotify:episode:2RoLspr2PQTki51sfMhd20,He'd already talked about wanting to work with...,85.902397,0.29


In [114]:
# for df in of_interest:
#     display(df.head())

Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"Oh, yeah other Shooters we're super fortunate ...",89.197632,0.93
1,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,I can't I can't but but like I'm just that's w...,89.057426,0.93
2,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,And I don't know there's a lot of people inter...,88.724937,0.93
3,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's always a great experience in like you're ...,88.405342,0.93
4,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,It's just so funny shown up because there's so...,88.26886,0.93


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,"I mean, those are you know, there's some signi...",88.281265,0.63
1,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,I thought that's this was they had something g...,87.972641,0.63
2,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,"So well, I mean like we're going on informatio...",87.631943,0.63
3,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,But I also know that fans like Kyra Williams b...,86.817825,0.63
4,spotify:episode:7JG3lLnRoDdOxuqjf14ZkM,We think we know who it is.,86.626434,0.63


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"now, so it's trying to get the sponsors as wel...",92.697548,0.69
1,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,about that on KP in the other night to know w...,89.100616,0.69
2,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,And a lot of the women's groups on Facebook an...,88.689362,0.69
3,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"Ronnie when wherever we're up that way, but ye...",88.287003,0.69
4,spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V,"You know, I run a website you've got And peop...",87.610291,0.69


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,"So, you know, that's that's one reason why we ...",92.369278,0.64
1,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,We're doing giveaways announcements things lik...,89.14006,0.64
2,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,"You know Heroes on the water, you know, it def...",88.581161,0.64
3,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,We're joined columns and right.,88.402306,0.64
4,spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L,Thank you so much.,88.307861,0.64


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5xBPWxqVCocdBgybmHjr5V,So you got any anybody like to thank sponsor s...,91.726151,0.87
1,spotify:episode:5xBPWxqVCocdBgybmHjr5V,I had some customers.,89.408089,0.87
2,spotify:episode:5xBPWxqVCocdBgybmHjr5V,"Well, thanks a lot of guys like that.",89.261078,0.87
3,spotify:episode:5xBPWxqVCocdBgybmHjr5V,All right you guys to thanks a lot again.,89.106979,0.87
4,spotify:episode:5xBPWxqVCocdBgybmHjr5V,Thanks.,89.094635,0.87


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0X663c1I6j1cehJvy10WMm,We are all in this together everything from bu...,89.035439,0.32
1,spotify:episode:0X663c1I6j1cehJvy10WMm,So I'm happy to say that I have enough faith i...,88.109879,0.32
2,spotify:episode:0X663c1I6j1cehJvy10WMm,Just went all over the faces of the people sit...,87.690369,0.32
3,spotify:episode:0X663c1I6j1cehJvy10WMm,Thank you so much for listening.,87.602249,0.32
4,spotify:episode:0X663c1I6j1cehJvy10WMm,I want to talk about the impacts that are happ...,87.313004,0.32


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Thank you.,89.589943,0.7
1,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Thanks.,89.094635,0.7
2,spotify:episode:61a1JjZO27lGCvCwBaCkpC,We're just so blessed and happy that you came ...,89.013054,0.7
3,spotify:episode:61a1JjZO27lGCvCwBaCkpC,We're so excited and you guys know where to fi...,88.213264,0.7
4,spotify:episode:61a1JjZO27lGCvCwBaCkpC,Behavior when presented with the same prompt t...,87.90477,0.7


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,"Good job, but also we gave I put up a post on ...",87.776657,0.7
1,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,"Questions to tackle, but before we start specu...",87.741821,0.7
2,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,So thank you very much to you.,87.721283,0.7
3,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,It's a great I think message for anyone right?,87.452133,0.7
4,spotify:episode:0goWRy1gwB23rQVy8ci7Wa,We have they've got they've given us officiall...,86.986946,0.7


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Ruminating on what mistakes the Christian demo...,86.35186,0.34
1,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,It was the red brigades each caller conveyed t...,86.3013,0.34
2,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Accusation to make but that was it to everyone...,86.255318,0.34
3,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,Aldo was candid about the failings of the Chri...,86.128883,0.34
4,spotify:episode:0BSD8QYmd2mQ1V43uIrU4I,While these futile efforts continued a debate ...,86.128006,0.34


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,There are too many different motives only thre...,85.812935,0.18
1,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,Our next assassin had a more selfish reasoning...,85.683319,0.18
2,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,He said only four percent of victims receive d...,85.526329,0.18
3,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,It was a question fool and family echoed the i...,85.372192,0.18
4,spotify:episode:5xH3cdpkxnJhQjPV22sxKC,Officially Booth wanted revenge on the man who...,85.04982,0.18


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"So I highlighted a large section here, but I r...",87.531731,0.44
1,spotify:episode:0YPvJfSEw0jacPB3IeT37d,But if you're a person of color and have a lot...,87.215431,0.44
2,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"But first of all, we'll go through and do our ...",87.074615,0.44
3,spotify:episode:0YPvJfSEw0jacPB3IeT37d,"You know, the one where they've got the main e...",86.967964,0.44
4,spotify:episode:0YPvJfSEw0jacPB3IeT37d,Thank you so much for joining us Anna you guys...,86.40255,0.44


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,If you have a message if you have some come co...,85.782341,0.09
1,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,I had my name balls did my practices I have re...,85.033485,0.09
2,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,Now my Skyrim everyone welcome back to my shoe...,84.972702,0.09
3,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,It's lot and I hope this possibilities availab...,84.891136,0.09
4,spotify:episode:1gnpv26FFvIxpnwVbbRXv1,"I want myself back sir, but accessible to you ...",84.821716,0.09


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:6rh4J52THn252yi7t11Yqf,I really apologize you and I thank you.,88.978973,0.47
1,spotify:episode:6rh4J52THn252yi7t11Yqf,I applaud you.,88.309372,0.47
2,spotify:episode:6rh4J52THn252yi7t11Yqf,"Just please listen, he's helping me and it mak...",87.807602,0.47
3,spotify:episode:6rh4J52THn252yi7t11Yqf,I never dared me and I wish I did but I just n...,87.645622,0.47
4,spotify:episode:6rh4J52THn252yi7t11Yqf,"Oh my God, I'm really a good friend and David ...",86.723396,0.47


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3IfmcM2rcWb82601pkPvCh,Thank you listeners for supporting these incre...,89.696877,0.11
1,spotify:episode:3IfmcM2rcWb82601pkPvCh,Today's episode is sponsored by glory and shin...,87.014542,0.11
2,spotify:episode:3IfmcM2rcWb82601pkPvCh,Why didn't we see that showing up in love and ...,86.254257,0.11
3,spotify:episode:3IfmcM2rcWb82601pkPvCh,"She is Beloved the Driver, yes beloved the ca...",86.108673,0.11
4,spotify:episode:3IfmcM2rcWb82601pkPvCh,You can find her on Instagram at Krug The Thin...,86.108315,0.11


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Thank you listeners for supporting these incre...,89.696877,0.1
1,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Today's episode is sponsored by glory and shin...,87.014542,0.1
2,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Today's episode is all Also sponsored by pray...,85.987732,0.1
3,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,She is passionate about her faith and has cont...,85.194092,0.1
4,spotify:episode:5LJ33LdXWhqOu1KNad6D5q,Please visit pray more Retreat dot-org.,84.700455,0.1


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Thank you.,89.589943,0.2
1,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Etc.,86.95089,0.2
2,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,I've got a question first well means please go...,85.642418,0.2
3,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Even those trusted crew members can sometimes ...,85.614006,0.2
4,spotify:episode:1AxFBio6NwwG0MAjdCK5gK,Our pilgrimage pod and Facebook at the pilgrim...,85.602974,0.2


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3U33mRnDJcXywmBm1hahlL,Make sure to follow along with us know who you...,89.969749,0.89
1,spotify:episode:3U33mRnDJcXywmBm1hahlL,Thank you.,89.589943,0.89
2,spotify:episode:3U33mRnDJcXywmBm1hahlL,Thank you.,89.589943,0.89
3,spotify:episode:3U33mRnDJcXywmBm1hahlL,We have some announcements and reminders is an...,88.014763,0.89
4,spotify:episode:3U33mRnDJcXywmBm1hahlL,So Tina's like who's been Bianca that making m...,87.64444,0.89


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,"Also, if you want to be if you want to sponsor...",89.189102,0.15
1,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,Thank God.,88.359428,0.15
2,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,Thank you so much for listening.,87.602249,0.15
3,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,"Hey, maybe one of those things of this hopeful...",86.95015,0.15
4,spotify:episode:57Nzb9H2VRZgHcNFChwbBG,I'm on a Mobile hairdressing page and I asked...,86.920303,0.15


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,They love it and you know investors are drivin...,88.753769,0.34
1,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,"We talked about earlier, but they also offer o...",88.405243,0.34
2,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,"You know, I'm somebody who's been doing this.",87.956009,0.34
3,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,There's something called eBay Bucks A lot of p...,87.790627,0.34
4,spotify:episode:6vRLNVEQ4xqtMxnms3RZh3,If you want like links to those just DME and I...,87.660431,0.34


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,Thanks so much and we'll see you all tomorrow.,87.432022,0.16
1,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,So now that we've gone over the major points a...,85.319595,0.16
2,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,And most within one hour they did not comment ...,84.946251,0.16
3,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,Keep in mind that this podcast is designed to ...,84.841286,0.16
4,spotify:episode:1tN044BhlPjjiluZ7Wo7UL,This is the ortho bullets podcast a daily audi...,84.583199,0.16


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,I would say like entirely So I would like to ...,89.339706,0.44
1,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,Their appreciation support for the franchise a...,88.945366,0.44
2,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,Thank you all for your support.,88.551247,0.44
3,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,It would be very welcome.,87.48336,0.44
4,spotify:episode:1fs86N6FLUKW2e5NdX1dF1,They're going to be paying attention to number...,87.419342,0.44


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,And that is it guys few announcements.,89.736458,0.86
1,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Thank you for joining us.,89.038361,0.86
2,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Don't forget to like comment and subscribe to ...,87.686356,0.86
3,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Like like you you hype up these people the ent...,87.495583,0.86
4,spotify:episode:1A4cHtP3wIVQutpCgS7kd7,Thanks for listening guys.,87.459137,0.86


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:1Mi90UjG91rm73qvHzcG0t,Hi everyone.,86.922188,0.02
1,spotify:episode:1Mi90UjG91rm73qvHzcG0t,In the near term Asian markets may be in for c...,83.84449,0.02
2,spotify:episode:1Mi90UjG91rm73qvHzcG0t,This has been a podcast from ocbc Bank follow ...,83.572586,0.02
3,spotify:episode:1Mi90UjG91rm73qvHzcG0t,I'm Selena Ling Chief Economist for CPC bang t...,83.389191,0.02
4,spotify:episode:1Mi90UjG91rm73qvHzcG0t,If you reference the G7 meeting of Finance Chi...,83.141045,0.02


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,Thank you.,89.589943,0.11
1,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,I hope you do to thank you so much.,88.546982,0.11
2,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,He's with mr. D. Hello and welcome to Friday f...,86.276268,0.11
3,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,Thanks for having happy Friday everybody.,85.741425,0.11
4,spotify:episode:3TuC8HZp9VdXtxYMQkJI0m,I know they do have a parent or advisory warni...,85.498055,0.11


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much for chatting.,88.334587,0.18
1,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much for joining me on for the lo...,87.355186,0.18
2,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Thank you so much Rodman.,86.162308,0.18
3,spotify:episode:67v8V9SOXxivYQfAHSsc5f,I'm so excited to be here.,85.876465,0.18
4,spotify:episode:67v8V9SOXxivYQfAHSsc5f,Do you have any tips for teachers I guess to p...,85.684448,0.18


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,We just want to say thanks to our sponsor.,95.07843,0.63
1,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,And today Spotify is one of our sponsors on Sp...,92.813988,0.63
2,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.63
3,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.63
4,spotify:episode:3QE8qxHtJg3zMrq03R4GOj,Thank you.,89.589943,0.63


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,"So that's and if I'm proud of it, but it's par...",87.354828,0.56
1,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,We are.,87.203232,0.56
2,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,But my guess is that people who work with you ...,87.027206,0.56
3,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,"I think that you know, I just sort of help peo...",86.817032,0.56
4,spotify:episode:3Vr6AUCTQgVWoE137b4IdB,for example I can give you one example that c...,86.807785,0.56


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,Thank you.,89.589943,0.23
1,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,But thank God.,88.522011,0.23
2,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,And thanks again for joining us today.,88.509727,0.23
3,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,"Thank you again, man.",88.089447,0.23
4,spotify:episode:4oQZAcd6xnxVA1e2GWaIFb,"I've used other platforms, and I'm so happy I ...",87.564529,0.23


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,They will even help you find other companies t...,90.060837,0.46
1,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,We're your hosts.,89.264397,0.46
2,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,So they're waving and everything and because o...,87.569412,0.46
3,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,We want to take some time to introduce you to ...,87.446594,0.46
4,spotify:episode:4sbSs1xtuf8dF3xtb7btUi,"So again, thanks for listening and we'll see y...",86.970871,0.46


Unnamed: 0,episode_uri,top_responses,top_scores,total_time
0,spotify:episode:2RoLspr2PQTki51sfMhd20,We are grateful for you our listeners you allo...,90.327499,0.32
1,spotify:episode:2RoLspr2PQTki51sfMhd20,In other words Congressional staffers are Ed ...,86.872383,0.32
2,spotify:episode:2RoLspr2PQTki51sfMhd20,Thanks for listening.,86.267189,0.32
3,spotify:episode:2RoLspr2PQTki51sfMhd20,Is or his wife's election campaigns any favors...,86.216476,0.32
4,spotify:episode:2RoLspr2PQTki51sfMhd20,He'd already talked about wanting to work with...,85.902397,0.32


In [61]:
# indicator_episodes_df.to_excel('output/distilbert_dot_tasb_b256_msmarco_indicator_episodes.xlsx', index=False)

In [119]:
for df in of_interest:
    episode_uri = df.iloc[0,0].replace(":", "_")
    df.to_excel(f'output/distilbert_dot_tasb_{episode_uri}.xlsx', index=False)

# Examine results

In [8]:
import pandas as pd
path = 'output/Indicator_episode_results/recovery_distilbert_dot_tasb_true_score/'

episode_dfs = []
for num in range(3,33):
    indicator_episodes_df = pd.read_excel(f'{path}distil_dot_tasb_recovery_Sheet{num}.xlsx')
    episode_dfs.append(indicator_episodes_df)

In [11]:
episode_dfs[0].head()

Unnamed: 0,episode_uri,human_identified,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,y_true,top_scores,top_responses
0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,You don't even need a premium account Spotify...,"Oh, yeah other Shooters we're super fortunate ...",0,89.197632,"Oh, yeah other Shooters we're super fortunate ..."
1,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,Out on Spotify,", I can't I can't but but like I'm just that's...",0,89.057426,I can't I can't but but like I'm just that's w...
2,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,You can follow your favorite podcast,", And I don't know there's a lot of people int...",0,88.724937,And I don't know there's a lot of people inter...
3,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,And if you're a premium user you can download...,", It's always a great experience in like you'r...",0,88.405342,It's always a great experience in like you're ...
4,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,So if you haven't done so already be sure to ...,", It's just so funny shown up because there's ...",0,88.26886,It's just so funny shown up because there's so...


In [12]:
import pandas as pd
path = 'output/recovery/distil_dot_tasb_recovery_'

scored_dfs = []

for scored_df in episode_dfs:

    # episode_uri = scored_df.iloc[0,0]
    # print(episode, episode_uri)

    # display(scored_df.head(2))
    scored_df['y_true'].fillna(0, inplace=True)
    
    scored_df.sort_values(by=['top_scores'], ascending=False, inplace=True)
    scored_dfs.append(scored_df)

In [13]:
from sklearn.metrics import ndcg_score
import numpy as np

# ordered greatest to least
ndcg_scores = []

for df in scored_dfs:
    episode_uri = df.iloc[0,0]
    true_relevance = np.asarray([df['y_true']])
    try:
        predicted_scores = np.asarray([df['top_scores']])
        ndcg = ndcg_score(true_relevance, predicted_scores)
    except ValueError as error:
        print(error)
        print("Episode URI: ", episode_uri)
        continue

    
    ndcg_scores.append(ndcg)
    print(episode_uri,"NDCG score: ", ndcg)

avg_ndcg_score = np.mean(ndcg_scores)
print('Average NDCG score: ', avg_ndcg_score, "for model: ", pre_trained_model_name)

spotify:episode:5fG4VlWnWwzAt6mSs0H7lY NDCG score:  0.0
spotify:episode:7JG3lLnRoDdOxuqjf14ZkM NDCG score:  0.25
spotify:episode:3kkhUQJ9DXYs6aSdDmPp2V NDCG score:  0.0
spotify:episode:4fJ6Y6IpljKy8FT8DZHx1L NDCG score:  0.961999147059583
spotify:episode:5xBPWxqVCocdBgybmHjr5V NDCG score:  1.0
spotify:episode:0X663c1I6j1cehJvy10WMm NDCG score:  0.0
spotify:episode:61a1JjZO27lGCvCwBaCkpC NDCG score:  0.0
spotify:episode:0goWRy1gwB23rQVy8ci7Wa NDCG score:  0.40315091002613573
spotify:episode:0BSD8QYmd2mQ1V43uIrU4I NDCG score:  0.0
spotify:episode:5xH3cdpkxnJhQjPV22sxKC NDCG score:  0.21274605355336274
spotify:episode:0YPvJfSEw0jacPB3IeT37d NDCG score:  0.0
spotify:episode:1gnpv26FFvIxpnwVbbRXv1 NDCG score:  0.5387008702316818
spotify:episode:6rh4J52THn252yi7t11Yqf NDCG score:  0.0
spotify:episode:3IfmcM2rcWb82601pkPvCh NDCG score:  0.6013954160211185
spotify:episode:5LJ33LdXWhqOu1KNad6D5q NDCG score:  0.7040965536719686
spotify:episode:1AxFBio6NwwG0MAjdCK5gK NDCG score:  0.23137821315975

# Initial Test

## Full Transcript - CSV

In [62]:
results = pd.read_csv("output/test_transcripts.csv")
test_1_transcript = results.loc[1, 'transcript']
results

Unnamed: 0.1,Unnamed: 0,episode_uri,transcript
0,0,spotify:episode:5fG4VlWnWwzAt6mSs0H7lY,"What's up, guys? This episode of the podcast i..."
1,1,spotify:episode:2WQ1GcC6J0k7qsO8Vvf2be,"What's up, everybody? Welcome to the in the do..."
2,2,spotify:episode:5hvOWPoB0j6HMrSVAMtJLV,You are listening to Irish illustrate Insider....


In [63]:

# our relevant example
passage1_input = tokenizer(test_1_transcript[:], return_tensors="pt")

# the user query -> which should give us a better score for the first passage
query_input = tokenizer("who are the advertisers, sponsors, advertisement, or ads and/or businesses, people, teams thanked?",return_tensors="pt")
print("Passage 1 Tokenized:",tokenizer.convert_ids_to_tokens(passage1_input["input_ids"][0]))
print("Query Tokenized:",tokenizer.convert_ids_to_tokens(query_input["input_ids"][0]))
# note how we call the bert model independently between passages and query :)
# [0][:,0,:] pools (or selects) the CLS vector from the full output
passage1_encoded = bert_model(**passage1_input)[0][:,0,:].squeeze(0)
query_encoded    = bert_model(**query_input)[0][:,0,:].squeeze(0)

print("---")
print("Passage Encoded Shape:",passage1_encoded.shape)
print("Query Encoded Shape:",query_encoded.shape)
# Now that we have our encoded vectors, we can generate the score with a simple dot product!

# (This can be offloaded to a vector indexing library like Faiss)
score_for_p1 = query_encoded.dot(passage1_encoded)
print("Score passage 1 <-> query: ",float(score_for_p1))

Token indices sequence length is longer than the specified maximum sequence length for this model (11768 > 512). Running this sequence through the model will result in indexing errors


## By Sentence

In [35]:
episode_ = {
    "query": {
        "match_phrase": {
            "_id": 'spotify:episode:5fG4VlWnWwzAt6mSs0H7lY',
        }
    }
}

# Execute the Elasticsearch search query
# show_search_results = es.search(index="spotify_podcast_transcripts", body=show_body)
results = es.search(index="spotify_podcast_transcripts", body=episode_)

# Extract relevant information from the search results
hits = results["hits"]["hits"]

In [49]:
for sentence in hits[0]['_source']['sentence_tokens']:
    sentence_len = len(sentence)
    if sentence_len > 800:
        split_by = 2
        split = round(sentence_len // split_by,0)

        sentence_split = [sentence[i:i+split] for i in range(0, len(sentence), split)]
        print(len(sentence_split))

2
2
3
3
2
3
3
2


In [37]:
query_input = tokenizer("who are the advertisers, sponsors, advertisement, or ads and/or businesses, people, teams thanked?",return_tensors="pt")
print("Query Tokenized:",tokenizer.convert_ids_to_tokens(query_input["input_ids"][0]))
query_encoded = bert_model(**query_input)[0][:,0,:].squeeze(0)

sentence_score = []

for sentence in hits[0]['_source']['sentence_tokens']:
    sentence_input = tokenizer(sentence, return_tensors="pt")
    tokenizer.convert_ids_to_tokens(sentence_input["input_ids"][0])
    passage1_encoded = bert_model(**sentence_input)[0][:,0,:].squeeze(0)
    score_for_p1 = query_encoded.dot(passage1_encoded)

    sentence_score.append((sentence, score_for_p1.item()))

Query Tokenized: ['[CLS]', 'who', 'are', 'the', 'ad', '##vert', '##iser', '##s', ',', 'sponsors', ',', 'advertisement', ',', 'or', 'ads', 'and', '/', 'or', 'businesses', ',', 'people', ',', 'teams', 'thanked', '?', '[SEP]']


IndexError: index out of range in self

In [27]:

sentence_score.sort(key=lambda tup: tup[1], reverse=True)
sentence_score[:25]

[('Thank you listeners for supporting these incredible Catholic businesses.',
  89.6968765258789),
 ("Today's episode is sponsored by glory and shine offering lotions soap bars lip balms beard care and much much more all of their products are natural wholesome and prayerfully designed and crafted to inspire the Catholic faithful during everyday acts of life and to promote prayer meditation on scripture and Marian devotion for ten percent off your entire order use promotional code Rua at checkout.",
  87.01454162597656),
 ("Why didn't we see that showing up in love and support would have been an act of worship true worship of our god of compassion and tenderness and mercy, I had missed my opportunity to love.",
  86.25425720214844),
 ('She is Beloved the  Driver, yes beloved the cashier checking me out at the grocery store.',
  86.10867309570312),
 ('You can find her on Instagram at Krug The Thinker listeners the ask that you sit with us pray with us.',
  86.10831451416016),
 ("Today's 

## Split and sentences

In [107]:
# num_sentences = hits[0]['_source']['num_sentences']

# for split_by in range(2,2):

#     if num_sentences % split_by == 0:
#         split = num_sentences / split_by
#         hits[0]['_source']['sentence_tokens'][:split]

num_sentences = hits[0]['_source']['num_sentences']

# create an interval to know where to split the sentences and create multiple intervals
intervals = np.array(list(range(0, num_sentences, 2)))

segments = [segment_[x:x + segment_length] for x in range(0, len(segment), segment_length)]

NameError: name 'segment' is not defined

In [90]:
# identify points to split the sentence_tokens, find last sentence in the split and then split the transcript

num_sentences = hits[0]['_source']['num_sentences']
split_by = 2

if num_sentences % split_by == 0:
    split = num_sentences // split_by

split_sentence = hits[0]['_source']['sentence_tokens'][split-1]  # Removed unnecessary slicing [split-1:split][0]
print("Split Sentence:", split_sentence)

# Find the index of split_sentence in the transcript
transcript = hits[0]['_source']['transcript']
split_index = transcript.find(split_sentence)

if split_index != -1:
    # Split the transcript based on the found index
    transcript_before_split = transcript[:split_index].strip()
    transcript_after_split = transcript[split_index + len(split_sentence):].strip()
else:
    print("Split sentence not found in the transcript.")


split_transcript = [transcript_before_split, transcript_after_split]

for num, transcript_part in enumerate(split_transcript):
    sentence_input = tokenizer(sentence, return_tensors="pt")
    tokenizer.convert_ids_to_tokens(sentence_input["input_ids"][0])
    passage1_encoded = bert_model(**sentence_input)[0][:,0,:].squeeze(0)
    score_for_p1 = query_encoded.dot(passage1_encoded)
    print("Score passage ", num, "<-> query: ",float(score_for_p1))



Split Sentence: Alright what I think we'll start to see is those two will start to play better as well because they would have been victims of the team on the wrong side of turning the tide.
Score passage  0 <-> query:  83.94951629638672
Score passage  1 <-> query:  83.94951629638672


In [111]:
# identify points to split the sentence_tokens, find last sentence in the split and then split the transcript

num_sentences = hits[0]['_source']['num_sentences']
split_by = 2

if num_sentences % split_by == 0:
    split = num_sentences // split_by

split_sentence = hits[0]['_source']['sentence_tokens'][split-1]  # Removed unnecessary slicing [split-1:split][0]
print("Split Sentence:", split_sentence)

# Find the index of split_sentence in the transcript
transcript = hits[0]['_source']['transcript']
split_index = transcript.find(split_sentence)

if split_index != -1:
    # Split the transcript based on the found index
    transcript_before_split = transcript[:split_index].strip()
    transcript_after_split = transcript[split_index + len(split_sentence):].strip()
else:
    print("Split sentence not found in the transcript.")

transcript_split = len(transcript_before_split)

transcript_split


Split Sentence: Alright what I think we'll start to see is those two will start to play better as well because they would have been victims of the team on the wrong side of turning the tide.


23898

In [112]:
test_1_transcript[:transcript_split]

"What's up, everybody? Welcome to the in the dome podcast podcast body are you doing hey, how you doing? I'm pretty good myself. All right. What do you want to talk about today? We got a breakdown. I know there's not much stock boat. We still haven't broken down the Columbus game, but cardiac / comeback kids come back. You know what that game room. Okay, I believe off the where we thought we were talking about a couple of episodes ago were talking about house like a toxic relationship and I was starting to get sucked back in. Yep.  And then I think you were to it's like when fully back on board is fully back with the Calgary Flames right now to start that podcast. I was like, I'm not I'm not falling for it. I've been hurt too many times by these guys, but by the end of that podcast we switched but I was still whatever. Yeah, I'm fine. I'm told I'm fully back. You're fully back. I'm proud. I'm not fully back. They've sucked me in that Columbus game. Got me back to being like, I think I'

In [113]:

# our relevant example
passage1_input = tokenizer(test_1_transcript[:transcript_split], return_tensors="pt")
passage2_input = tokenizer(test_1_transcript[transcript_split:], return_tensors="pt")

# the user query -> which should give us a better score for the first passage
query_input = tokenizer("who are the advertisers, sponsors, advertisement, or ads and/or businesses, people, teams thanked?",return_tensors="pt")


Token indices sequence length is longer than the specified maximum sequence length for this model (5793 > 512). Running this sequence through the model will result in indexing errors


In [None]:


# note how we call the bert model independently between passages and query :)
# [0][:,0,:] pools (or selects) the CLS vector from the full output
passage1_encoded = bert_model(**passage1_input)[0][:,0,:].squeeze(0)
passage2_encoded = bert_model(**passage2_input)[0][:,0,:].squeeze(0)
query_encoded    = bert_model(**query_input)[0][:,0,:].squeeze(0)

# (This can be offloaded to a vector indexing library like Faiss)
score_for_p1 = query_encoded.dot(passage1_encoded)
print("Score passage 1 <-> query: ",float(score_for_p1))

score_for_p2 = query_encoded.dot(passage2_encoded)
print("Score passage 1 <-> query: ",float(score_for_p2))

## Use elasticsearch to get segments

In [13]:
episode_sentences = {
    "query": {
        "match": {
            "id_": 'spotify:episode:2WQ1GcC6J0k7qsO8Vvf2be',
        }
    }
}

# Execute the Elasticsearch search query
# show_search_results = es.search(index="spotify_podcast_transcripts", body=show_body)
results = es.search(index="transcripts_sentences", body=episode_sentences)

# Extract relevant information from the search results
hits = results["hits"]["hits"]

In [14]:
len(hits)

0