In [121]:
import os
from getpass import getpass

from dotenv import load_dotenv



In [122]:
import pstuts_rag

In [123]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [124]:
from dataclasses import dataclass
@dataclass
class ApplicationParameters:
    filename = "data/test.json"
    embedding_model = "text-embedding-3-small"
    n_context_docs = 2

params = ApplicationParameters()

In [125]:

load_dotenv()

def set_api_key_if_not_present(key_name, prompt_message=""):
    if len(prompt_message) == 0:
        prompt_message=key_name
    if key_name not in os.environ or not os.environ[key_name]:
        os.environ[key_name] = getpass.getpass(prompt_message)

set_api_key_if_not_present("OPENAI_API_KEY")

# Data Preparation

First, we will read in the transcripts of the videos and convert them to Documents
with appropriate metadata.

In [126]:
import json
filename = "../data/test.json"

data = json.load(open(filename, "rb"))


In [127]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from pstuts_rag.datastore import transcripts_load

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
docs_chunks_semantic = transcripts_load(data,embeddings)

## R - retrieval

Let's hit it with a semantic chunker.

In [128]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

qdrantclient = QdrantClient(":memory:")

vectorstore = pstuts_rag.datastore.initialize_vectorstore(
    client=qdrantclient,
    collection_name=f"{params.filename}_qdrant",
    embeddings=embeddings,
)

_ = vectorstore.add_documents(documents=docs_chunks_semantic)
retriever =vectorstore.as_retriever(
    search_kwargs={"k": params.n_context_docs}
)

## A - Augmentation

We need to populate a prompt for LLM.


In [139]:
from langchain.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_messages([
    ("system", """\
You are a helpful and friendly Photoshop expert.

Your job is to answer user questions based **only** on transcript excerpts from training videos. These transcripts include **timestamps** that indicate when in the video the information was spoken.

The transcript is from **spoken audio**, so it may include informal phrasing, filler words, or fragmented sentences. You may interpret meaning **only to the extent it is clearly implied**, but you must not add new information or invent details.

✅ Your Responsibilities

1. Use **only** the transcript to answer.
2. If a clear answer is **not** present in the transcript, respond exactly:  
   "I don't know. This isn’t covered in the training videos."
3. When appropriate, include the **timestamp** of relevant information in your answer to help the user locate it in the original video.
4. Do **not** make assumptions or draw on outside knowledge.

💡 Style & Formatting Tips

- Use a step-by-step format when explaining procedures 📋.
- Add relevant emojis for clarity and friendliness 🎨🖱️🔧.
- Keep your answers short, clear, and conversational.
- The input timestamps will be in seconds. When reporting timestamps, convert them into minute:seconds format.

⛔ Never Do This

- ❌ Don't guess or summarize from general knowledge.
- ❌ Don’t fabricate steps, names, or features not in the transcript.
- ❌ Don’t omit the fallback response when required.
"""),
    ("user","""\

### Question
{question}

NEVER invent the explanation. ALWAYS use ONLY the context information.

### Context
{context}

""")])


In [140]:
def compile_references(context):
    references = [ 
                    {k: doc.metadata[k] for k in ("title","source","start","stop")} 
                    for doc in context
    ] 
    print(type(references))
    return json.dumps(references,indent=2)


## Generation

We will use a 4.1-nano to generate answers.

In [141]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4.1-mini",temperature=0)

In [142]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain_core.runnables import RunnableLambda

form_context = RunnableLambda(itemgetter("question")) | {
    "context":  retriever, 
    "question": RunnablePassthrough() 
    } 

answer_chain = prompt_template | llm | StrOutputParser()

get_videos =   form_context | \
    {"input":RunnablePassthrough(),"answer": answer_chain} |\
    RunnableLambda( lambda d: 
        {**d["input"], "answer": d["answer"] + 
         "\nReferences:\n" +
         compile_references(d["input"]["context"]) 
        } )
    


val = get_videos.invoke({"question":"What are layers"})
pp(val["answer"])

<class 'list'>
('Layers are the building blocks of any image in Photoshop CC. You can think '
 'of layers like separate flat panes of glass stacked on top of each other. '
 'Each layer contains separate pieces of content. Some parts of a layer can be '
 'transparent, allowing you to see through to the layers below. This setup '
 'lets you edit parts of an image independently without affecting the rest of '
 'the image. You manage and work with layers in the Layers panel, where you '
 'can toggle their visibility on and off using the Eye icon. (See explanation '
 'around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\n'
 'References:\n'
 '[\n'
 '  {\n'
 '    "title": "Understand layers",\n'
 '    "source": '
 '"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4",\n'
 '    "start": 0.47,\n'
 '    "stop": 62.14\n'
 '  },\n'
 '  {\n'
 '    "title": "Understand layers",\

In [143]:
pp(val["answer"])

('Layers are the building blocks of any image in Photoshop CC. You can think '
 'of layers like separate flat panes of glass stacked on top of each other. '
 'Each layer contains separate pieces of content. Some parts of a layer can be '
 'transparent, allowing you to see through to the layers below. This setup '
 'lets you edit parts of an image independently without affecting the rest of '
 'the image. You manage and work with layers in the Layers panel, where you '
 'can toggle their visibility on and off using the Eye icon. (See explanation '
 'around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\n'
 'References:\n'
 '[\n'
 '  {\n'
 '    "title": "Understand layers",\n'
 '    "source": '
 '"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4",\n'
 '    "start": 0.47,\n'
 '    "stop": 62.14\n'
 '  },\n'
 '  {\n'
 '    "title": "Understand layers",\n'
 '    "sourc

In [144]:
pp(value.content)

('Layers are the building blocks of any image in Photoshop CC. 🖼️ They can be '
 'thought of as separate flat pints of glass, stacked one on top of the other. '
 'Each layer contains separate pieces of content, and some layers may have '
 'transparent areas that let you see through to the layers below. The Layers '
 'panel is where you select and work with layers, and you can toggle their '
 'visibility by clicking the Eye icon. The main benefit of layers is that they '
 'allow you to edit parts of an image independently without affecting the '
 'rest. \n'
 '\n'
 '📺 Watch the full explanation in the video titled "Understand layers" here: '
 '[https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4](https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.12

In [145]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict,Annotated
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_openai.chat_models import ChatOpenAI
import operator

class State(TypedDict):
    question: str
    context: List[Document]
    response: str
        
graph_builder = StateGraph(State).add_sequence([retrieve, generate ])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

NameError: name 'generate' is not defined

In [53]:
from langchain.schema.output_parser import StrOutputParser
response = graph.invoke({"question" : "What is the layer in Photoshop"})

In [None]:
response.keys()

In [None]:
type(response)

In [None]:
pp(response)

In [None]:
response.keys()