In [1]:
import os

from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

import chromadb
from chromadb.utils import embedding_functions
from chromadb import Documents, EmbeddingFunction, Embeddings

import numpy as np

from IPython.display import Markdown, display
import IPython

import re

import dotenv
dotenv.load_dotenv()


True

## Datasets Prep

In [2]:
%%time
transcripts_path = "./datasets/transcripts"
db_path = "./db"
raw_documents = []
files = os.listdir(transcripts_path)
for fn in files:
    doc = TextLoader(transcripts_path+"/"+fn, encoding="latin-1").load()
    raw_documents.append(doc[0])


CPU times: total: 15.6 ms
Wall time: 122 ms


In [3]:
%%time
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=350)
documents = text_splitter.split_documents(raw_documents)

CPU times: total: 62.5 ms
Wall time: 130 ms


In [4]:
%%time
vectordb = Chroma(persist_directory=db_path, embedding_function=OpenAIEmbeddings())

CPU times: total: 469 ms
Wall time: 763 ms


### Embedding Transcripts
> **WARNING:**
> Only run if dataset isnt prepared prepared

In [5]:
print("preparing datasets and storing in chroma")
# uncomment everything below and run


# array_split = lambda x, n: [x[i:i+n] for i in range(0,len(x),n)]
# batch_size = 200
# batches = array_split(documents, batch_size)
# blen = len(batches)
# mlen = len(documents)
# print(blen,mlen)

# for i,chunk in enumerate(batches):
#     print(f"{(i*batch_size)}/{mlen} = {100*((i+1)/blen)}%:")
#     vectordb.add_documents(chunk)

preparing datasets and storing in chroma


In [6]:
# chromaClient = chromadb.PersistentClient(path=db_path)
# langchaincol = chromaClient.get_collection("langchain")
# chromaClient.delete_collection("langchain") #reset
# langchaincol.count()
# chromaClient.list_collections()


In [11]:
query = "ted mosby, the architect"

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. 
    Your task is to rephrase the user input such that, it includes maximum of five possible ways it can could be said, to search relevant documents from a vector database. 
    The databse has many documents which are transcripts from a show named 'How I met your mother'
    donot generate prompt or command, just generate query words rephrased properly 
    split response by comma
    i repeat, make sure you split responses by comma 
    Generate a rephrased response to search in the transcripts to generate best results by matching best documents for user.
    User Input Query: {question}""",
)
llm = ChatOpenAI(temperature=0)
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)

enhanched_query = llm_chain.run(question=query)
print(enhanched_query)
answers = vectordb.similarity_search(enhanched_query)

Ted Mosby, the architect
Architect Ted Mosby
Ted Mosby profession architect
Character Ted Mosby as an architect
Ted Mosby job as an architect


In [12]:
# Making Output Human READABLEEEEE
for answer in answers:
    pgc = answer.page_content
    clean_cont = re.sub(r'.+:', lambda x: "<b>"+x.group()+"</b>", pgc).replace("\n\n","<br/>")
    s,e = answer.metadata['source'].split('/')[3].split('.')[0].split('x')
    d = "<br/>"+f"\n----------'<b>SEASON{str(int(s))} X EPISODE {str(int(e))}</b>'----------\n\n"+"</b>"
    display(Markdown(d))
    display(Markdown(""+clean_cont+""))
    display(Markdown("<b>"+str('-'*(len(d)//2))+"</b>"))

<br/>
----------'<b>SEASON2 X EPISODE 4</b>'----------

</b>

<b>Barney:</b> That's 'cause you're always like... (Dully) "Ted Mosby, architect". If it were me, I'd be like... (Confidently) "Ted Mosby, architect". Anything sounds impressive when it's said with the right attitude.<br/><b>Marshall (suavely):</b> Marshall Ericksen. Recently dumped and heading to a lame party. Whoa. Whoa, ladies, please take it easy. There's enough of me for everyone. Oh. Hi. Hello. All right. We're gonna take off..<br/><b>Barney:</b> Wait up, I'll leave with you guys. "Ted Mosby, architect". Trust me.<br/>(Barney and marshall leave. Ted starts leaving and suddenly walk to a girl)<br/><b>Ted:</b> Hey, just out of curiosity. If a guy told you he was an architect, what would you think of that?<br/><b>Girl:</b> Are you kidding? Architects are hot. How do you think Mr. Brady scored a babe like Carol?<br/><b>Ted:</b> Solid point. She did have hair of gold.<br/><b>Girl:</b> Yes, she did.<br/><b>Ted:</b> Mm-hmm.<br/>Later, Robin and Lily arrive at MacLaren's.<br/><b>Robin:</b> Hey, Carl, is Ted still here?<br/><b>Carl:</b> No. Hey, Lily. You still single?<br/><b>Lily:</b> Yes.

<b>------------------------------</b>

<br/>
----------'<b>SEASON4 X EPISODE 8</b>'----------

</b>

<b>Barney:</b> Yes. I know a great American architect who would be perfect. Ted Mosby.<br/>End flashback<br/><b>Barney:</b> All you have to do is come and offer your drawing, I take care of the Board, the project is yours.<br/><b>Ted:</b> You realize what a hero I'd be if I brought this project to my box?<br/><b>Barney:</b> It's not the best. We three, we'll work together.<br/><b>Marshall:</b> Ted will come to call.<br/><b>Barney:</b> Yes, totally.<br/><b>Ted:</b> What is it?<br/><b>Barney: It works like this:</b> one of us goes into the office the other and said...<br/>Flashback<br/>Marshall is in his office with two men. Barney comes to see.<br/><b>Barney:</b> Marshall, the teleconference begins soon.<br/><b>Marshall:</b> Sorry, I gotta go.<br/><b>Barney:</b> And then you go up on the roof... and drink beer.<br/>Barney and Marshall are on the roof.<br/><b>Marshall:</b> It's great. Drinking at work.<br/><b>Barney:</b> Basically, it is of "Mad Men."<br/><b>Marshall:</b> Yes! There is too much "Mad Men"!<br/><b>Barney:</b> I'll spank a secretary.<br/><b>Marshall:</b> That's exactly what they would do in this series.<br/><b>Barney:</b> What series?<br/>End flashback

<b>------------------------------</b>

<br/>
----------'<b>SEASON2 X EPISODE 4</b>'----------

</b>

<b>Bouncer:</b> Thanks, but seriously, it was crazy tonight. You really do need to refresh my memory.<br/><b>Lily:</b> Then can I have the...? That's cool. You keep that.<br/><b>Robin:</b> So these guys... One of them's an architect.<br/><b>Bouncer:</b> Ted Mosby, the architect?<br/><b>Robin:</b> That's the one.<br/><b>Bouncer:</b> Oh, yeah, he was here.<br/>[FLASHBACK]<br/><b>Ted:</b> And then I was all, "No, Frank Gehry, you're on your own this time." And that, my dear, was the end of that hack's career.<br/><b>Girl:</b> I live across the street.<br/><b>Ted:</b> That is one hell of a non sequitur.<br/><b>Girl:</b> You want to come over?<br/><b>Ted:</b> Well, I don't know. But I suppose I should at least give you a ride home. Hop on. (The girl jumps on Ted's back)<br/><b>Girl (to the bouncer):</b> I'll see you at book club, Vinny.<br/><b>Bouncer:</b> Yeah. I'll be bringing the crudités.<br/><b>Girl:</b> Nice. Giddy up!<br/><b>Marshall:</b> Dude, dude, what are you doing?<br/><b>Ted:</b> Galloping.<br/><b>Marshall:</b> You proved it, all right? The whole "Ted Mosby, architect" things works.<br/><b>Girl:</b> Architects rule! Whoo-hee!

<b>------------------------------</b>

<br/>
----------'<b>SEASON2 X EPISODE 4</b>'----------

</b>

<b>Barney:</b> Well, love to join you at that one. But I got tickets to Foxy Boxing.<br/><b>Ted:</b> I wish I could join you guys, but I got to get back to my apparently boring job.<br/><b>Marshall:</b> Your job's not boring.<br/><b>Ted:</b> Robin thinks so.<br/><b>Barney:</b> Dude, lots of chicks think architects are hot. Think about it, you create something out of nothing. You're like God. There's nobody hotter than God.<br/><b>Ted:</b> I love it when you quote Scripture.<br/><b>Barney:</b> I'm telling you, you should use the architect angle with the ladies.<br/><b>Ted:</b> Okay, first of all, I have a girlfriend. Second, the architect angle doesn't even work on her. And, third, I can't imagine that working on anyone ever.<br/><b>Barney:</b> That's 'cause you're always like... (Dully) "Ted Mosby, architect". If it were me, I'd be like... (Confidently) "Ted Mosby, architect". Anything sounds impressive when it's said with the right attitude.

<b>------------------------------</b>