In [2]:
from tqdm.autonotebook import tqdm
from getpass import getpass

PINECONE_API_KEY = getpass('Enter PINECONE_API_KEY')

In [3]:
import pinecone

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment="us-west4-gcp"
)

## Pinecone Index Creation

In [4]:
index_name = "shubhams-index"

if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        pods=1,
        pod_type="s1.x1",
        metadata_config={"indexed": ["episode_title"]}
    )

In [8]:
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 30987}},
 'total_vector_count': 30987}

## Querying

In [6]:
from sentence_transformers import SentenceTransformer

MODEL_NAME = "multi-qa-MiniLM-L6-cos-v1"
sentence_transformer_model = SentenceTransformer(MODEL_NAME)

In [22]:
def query_index(index_name, query, episode_title=None, num_results=5):
    index = pinecone.Index(index_name)

    # Embed the query
    print(f"Query: {query}")
    query_embedding = sentence_transformer_model.encode(query, show_progress_bar=False).tolist()

    metadata_filter = {"episode_title": {"$eq": episode_title}} if episode_title else None

    results = index.query(query_embedding, top_k=num_results, include_metadata=True, filter=metadata_filter)
    return results

In [23]:
query_index(index_name, "How many episodes of The Office did Mindy Kaling write?")

Query: How many episodes of The Office did Mindy Kaling write?


{'matches': [{'id': 'EPISODE_6-12-18',
              'metadata': {'end_time': datetime.datetime(2023, 3, 28, 0, 1, 47),
                           'episode_number': 'EPISODE 6',
                           'episode_title': 'HOT GIRL',
                           'start_time': datetime.datetime(2023, 3, 28, 0, 1, 20),
                           'text': 'I know you want to. You know my fast fact '
                                   'number one, this is the first episode '
                                   'written by Mindy Kaling. Yes. Mindy '
                                   'Kaling. So smart, so funny. And she was '
                                   'one of our most prolific writers on The '
                                   'Office. She wrote a total of twenty two '
                                   "episodes. Wow. I know. That's like, she "
                                   'wrote an entire season of The Office of '
                                   "our nine because the season

In [24]:
query_index(index_name, "What was the most expensive scene to film?")

Query: What was the most expensive scene to film?


{'matches': [{'id': 'Episode_151-459-465',
              'metadata': {'end_time': datetime.datetime(2023, 3, 28, 1, 12, 6),
                           'episode_number': 'Episode 151',
                           'episode_title': 'Threat Level Midnight with B.J. '
                                            'Novak',
                           'start_time': datetime.datetime(2023, 3, 28, 1, 11, 13),
                           'text': 'I know. They had to build a whole entire '
                                   "gas station in a freeway. Yes, you're "
                                   'right, Angela. The Jim Pam proposal scene. '
                                   'Randi told us it cost $177,000 for 40 '
                                   'seconds of screen time, 40 seconds. He '
                                   'said the second most expensive would '
                                   'probably go to filming on the boat in '
                                   'Niagara Falls, especiall