In [1]:
import os
from getpass import getpass

from dotenv import load_dotenv



In [2]:
import pstuts_rag

In [3]:
%load_ext autoreload
%autoreload 2


In [12]:
from dataclasses import dataclass
@dataclass
class ApplicationParameters:
    filename = "data/test.json"
    embedding_model = "text-embedding-3-small"
    n_context_docs = 2

params = ApplicationParameters()

In [4]:

load_dotenv()

def set_api_key_if_not_present(key_name, prompt_message=""):
    if len(prompt_message) == 0:
        prompt_message=key_name
    if key_name not in os.environ or not os.environ[key_name]:
        os.environ[key_name] = getpass.getpass(prompt_message)

set_api_key_if_not_present("OPENAI_API_KEY")

# Data Preparation

First, we will read in the transcripts of the videos and convert them to Documents
with appropriate metadata.

In [5]:
import json
filename = "../data/test.json"

data = json.load(open(filename, "rb"))


In [6]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from pstuts_rag.datastore import transcripts_load

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
docs_chunks_semantic = transcripts_load(data,embeddings)

## R - retrieval

Let's hit it with a semantic chunker.

In [15]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

qdrantclient = QdrantClient(":memory:")

vectorstore = pstuts_rag.datastore.initialize_vectorstore(
    client=qdrantclient,
    collection_name=f"{params.filename}_qdrant",
    embeddings=embeddings,
)

_ = vectorstore.add_documents(documents=docs_chunks_semantic)
retriever =vectorstore.as_retriever(
    search_kwargs={"k": params.n_context_docs}
)

In [17]:
def retrieve(state):
    retrieved_docs = retriever.invoke(state["question"])
    return {"context":retrieved_docs}


In [21]:
from pprint import pp

In [22]:
a = retrieve({"question":"What is a layer?"})
[ pp(d.page_content) for d in a["context"] ]

("Layers are the building blocks of any image in Photoshop CC. So, it's "
 "important to understand, what layers are and why to use them - which we'll "
 "cover in this video. If you're following along, open this layered image from "
 'the downloadable practice files for this tutorial. You might think of layers '
 'like separate flat pints of glass, stacked one on top of the other. Each '
 'layer contains separate pieces of content. To get a sense of how layers are '
 "constructed, let's take a look at this Layers panel. I've closed my other "
 'panels, so that we can focus on the Layers panel. But you can skip that. By '
 "the way: If your Layers panel isn't showing, go up to the Window menu and "
 'choose Layers from there. The Layers panel is where you go to select and '
 'work with layers. In this image there are 4 layers, each with separate '
 'content. If you click the Eye icon to the left of a layer, you can toggle '
 "the visibility of that layer off and on. So, I'm going to tu

[None, None]

## A - Augmentation

We need to populate a prompt for LLM.


In [85]:
from langchain.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_messages([
    ("system", """\
You are a helpful an expert on Photoshop and your goal is to help users
gain knowledge from a database of training videos. 
You answer questions based on provided context. 
Your answers use emojis for emphasis.

IMPORTANT: You must only use the provided context, and cannot use your own knowledge.
If there is no context that corresponds to the query, respond by saying
"I don't know. This is not available in our training library."

Most of the users questions will be in the form:
"How can I do ..."
or
"What is ..."

When appropriate, provide your answers in a step-by-step form.
NEVER invent the explanation. ALWAYS use ONLY the context information.
"""),
    ("user","""\

### Question
{question}

NEVER invent the explanation. ALWAYS use ONLY the context information.

### Context
{context}

""")])


In [86]:
def compile_references(context):
    references = [ 
                    {k: doc.metadata[k] for k in ("title","source","start","stop")} 
                    for doc in context
    ] 
    print(type(references))
    return json.dumps(references,indent=2)


## Generation

We will use a 4.1-nano to generate answers.

In [87]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4.1-nano",temperature=0)

In [96]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain_core.runnables import RunnableLambda

form_context = {
    "context": itemgetter("question") | retriever, 
    "question": itemgetter("question") 
    } | RunnablePassthrough()

answer_chain = prompt_template | llm | StrOutputParser()

get_videos =   form_context | \
    {"input":RunnablePassthrough(),"answer": answer_chain} |\
    RunnableLambda( lambda d: 
        {**d["input"], "answer": d["answer"] + 
         "\nReferences:\n" +
         compile_references(d["input"]["context"]) 
        } )
    


val = get_videos.invoke({"question":"What are layers"})

<class 'list'>


In [94]:
val.keys()

dict_keys(['context', 'question', 'answer'])

In [42]:
value

AIMessage(content='Layers are the building blocks of any image in Photoshop CC. 🖼️ They can be thought of as separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content, and some layers may have transparent areas that let you see through to the layers below. The Layers panel is where you select and work with layers, and you can toggle their visibility by clicking the Eye icon. The main benefit of layers is that they allow you to edit parts of an image independently without affecting the rest. \n\n📺 Watch the full explanation in the video titled "Understand layers" here: [https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4](https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4)'

In [52]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict,Annotated
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_openai.chat_models import ChatOpenAI
import operator

class State(TypedDict):
    question: str
    context: List[Document]
    response: str
        
graph_builder = StateGraph(State).add_sequence([retrieve, generate ])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [53]:
from langchain.schema.output_parser import StrOutputParser
response = graph.invoke({"question" : "What is the layer in Photoshop"})

In [None]:
response.keys()

In [None]:
type(response)

In [None]:
pp(response)

In [None]:
response.keys()