In [1]:
from langchain import PromptTemplate
from pinecone import Pinecone
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.document_loaders import PyPDFLoader , DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings



In [2]:
#Reading the pdf file data
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="**/*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [3]:
extracted_data = load_pdf("data/")

In [4]:
#checking the data
#extracted_data

In [5]:
#Split the documents into smaller chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20
    )
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks


In [6]:
text_chunks = text_split(extracted_data)
print("Length of text chunks: ", len(text_chunks))

Length of text chunks:  192


In [7]:
#Dowload the embeddings model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings


In [8]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [9]:
embeddings


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [25]:
import os
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

In [11]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "sbot"


pc.Index(index_name)

<pinecone.grpc.index_grpc.GRPCIndex at 0x17456f741a0>

In [12]:
#embed each chunk and upsert the embeddings into pinecone index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    embedding = embeddings,
    index_name = index_name
)


In [13]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [14]:
#Sementic search using the embeddings
def semantic_search(query):
    results = docsearch.similarity_search(query, k=3)
    return results

semantic_search("What is Fashion?") 

[Document(id='bc1f1c00-8968-415c-b122-3383ea94452a', metadata={'author': 'wajahat alam', 'creationdate': '2016-08-09T14:16:47+00:00', 'creator': 'Nitro Pro 7  (7. 0. 1. 5)', 'moddate': '2016-08-09T14:17:02+00:00', 'page': 3.0, 'page_label': '4', 'producer': 'Nitro Pro 7  (7. 0. 1. 5)', 'source': 'data\\Fashion Course.pdf', 'title': 'Diploma in Fashion Design', 'total_pages': 46.0}, page_content='Fashion Design (Short Course)\nP a g e 4 | 46\nIntroduction to Fashion Design\nClothing has long been used as more than just a way to coverpeople’s bodies. Different types of\nclothing clearly reveal status, especially in the past but even today, and also the wearers’ personalities.\nThis need to conceal and also to reveal relies on a range of different clothing types and will often be\nchosen by wearers to do both. One of the most puzzling aspects of clothing is fashion.'),
 Document(id='8726e33c-7e75-4415-8f7d-08f880d2cdaf', metadata={'author': 'wajahat alam', 'creationdate': '2016-08-09T14:1

In [43]:
#creating a prompt template for the LLM
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [47]:
prompt_template2="""
you are an helpful assistant. You will be provided with a context and a question.
The users are mostly students who are trying to learn about fashion, so always try to answer the question iin a way that is easy to understand.
Context: {context}
Question: {question}

Helpful answer:
"""

In [48]:
PROMPT = PromptTemplate(template=prompt_template2, 
                        input_variables=["context", "question"])


chain_type_kwargs = {"prompt": PROMPT}

In [32]:
GOOGLE_API_KEY = "AIzaSyC4cC_Da0ovmI7-aRfQ32Pdq8Tgq6TqxJY"

#creating the LLM instance using the gemini model
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=GOOGLE_API_KEY , temperature=0.7)



In [40]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})


In [50]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Uses "stuff" prompt pattern
    retriever=retriever,  # Make sure retriever is from PineconeVectorStore
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)


In [51]:
# Example usage
query = "What are the body types?"
response = qa.invoke({"query": query})
print(response["result"])

Okay, I can help with that! Based on the text you provided, here's a breakdown of the body types mentioned:

*   **Triangle:** This body type is characterized by a fitted bodice (the part of a garment that covers the torso) and a bell-shaped skirt that widens gently. Think of a Spanish-style dress as an example.

*   **Inverted Triangle:** This body type features wider shoulders and a narrower skirt. Dolman or raglan sleeves often accompany this shape. The text suggests this style is particularly flattering for women with larger busts.

*   **Oval:** This body type involves clothing that is draped over the figure and softly shaped, creating a feminine and decorative look.

I hope this helps you understand the different body types discussed in the text! Let me know if you have any other questions.
