In [None]:
import os
import requests

from zipfile import ZipFile
from langchain_chroma import Chroma
from dotenv import load_dotenv, find_dotenv
from langchain.schema import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema.runnable import RunnablePassthrough
from langchain_community.document_loaders import TextLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.prompts.chat import (
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

_ = load_dotenv(find_dotenv())
google_api_key = os.environ["GOOGLE_API_KEY"]

In [None]:
def download_folder_from_link(url, extract_path):
  """Downloads a folder from a given URL and extracts it to a specified path.
  """
  try:
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise an exception for non-200 status codes

    # Create the extract path if it doesn't exist
    os.makedirs(extract_path, exist_ok=True)

    with open("temp_archive.zip", "wb") as file:
      for chunk in response.iter_content(chunk_size=8192):
        file.write(chunk)

    with ZipFile("temp_archive.zip", 'r') as zip_ref:
      zip_ref.extractall(extract_path)

    print(f"Folder downloaded and extracted to {extract_path}")
    os.remove("temp_archive.zip") #Clean up

  except Exception as e:
    print(f"An unexpected error occurred: {e}")


# Example usage (replace with your actual URL and desired extract path)
download_folder_from_link("https://docs.python.org/3/archives/python-3.13-docs-text.zip", os.getcwd()+"\\")


In [None]:
# directory = "python-3.13-docs-text/c-api"

# import os
# import multiprocessing


# def load_text_file(file_path):
#     """Function to load a single text file."""
#     return TextLoader(file_path).load()


# def load_all_text_files(directory):
#     """Parallelized function to load all text files from a directory."""
#     file_paths = []

#     for root, _, files in os.walk(directory):
#         for file in files:
#             if file.endswith(".txt"):
#                 file_paths.append(os.path.join(root, file))
#     print(len(file_paths))
#     with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
#         all_docs = pool.map(load_text_file, file_paths)

#     return [doc for docs in all_docs for doc in docs]  # Flatten the list

# docs = load_all_text_files(directory)
# print(f"Loaded {len(docs)} documents.")

In [None]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

docs = TextLoader("python_3_13_changes.txt").load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
)
retriever = vectorstore.as_retriever()

template = """
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    Question: {question}
    You assist user queries based on : {context}
"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_message_prompt = HumanMessagePromptTemplate.from_template(
    input_variables=["question", "context"], template="{question}"
)
chat_prompt_template = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)


def generate_response(retriever, query):
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | chat_prompt_template
        | llm
        | StrOutputParser()
    )
    return chain.invoke(query)

In [None]:
print(generate_response(retriever, "What is this document about?"))

In [None]:
print(generate_response(retriever, "When is next indian parliment election?"))