In [1]:
# Step 1: Install dependencies (run in terminal first)
# pip install groq langchain pypdf
import os
from groq import Groq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, TextLoader

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Get the API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [3]:
# Step 3: Load documents from sample_documents folder
docs_folder = "sample_documents/"
docs = []

In [4]:
for filename in os.listdir(docs_folder):
    filepath = os.path.join(docs_folder, filename)
    if filename.lower().endswith(".pdf"):
        loader = PyPDFLoader(filepath)
    elif filename.lower().endswith(".txt"):
        loader = TextLoader(filepath)
    else:
        print(f"Skipping unsupported file type: {filename}")
        continue
    docs.extend(loader.load())

print(f"✅ Loaded {len(docs)} document(s)")

✅ Loaded 5 document(s)


In [6]:
# Step 4: Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200
)
docs_chunks = text_splitter.split_documents(docs)
print(f"📄 Total chunks created: {len(docs_chunks)}")

📄 Total chunks created: 6


In [15]:
# Step 5: Function to summarize text using Groq LLaMA
def summarize_with_groq(text):
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "user",
                "content": f"Please summarize the following text in 3 paragraphs:\n\n{text}"
            }
        ]
    )
    return response.choices[0].message.content

In [16]:
# Step 6: Summarize each chunk and combine
summary_list = []
for i, chunk in enumerate(docs_chunks):
    print(f"⏳ Summarizing chunk {i+1}/{len(docs_chunks)}...")
    summary_chunk = summarize_with_groq(chunk.page_content)
    summary_list.append(summary_chunk)

# Step 7: Combine all summaries into final output
final_summary = "\n\n".join(summary_list)

⏳ Summarizing chunk 1/6...
⏳ Summarizing chunk 2/6...
⏳ Summarizing chunk 3/6...
⏳ Summarizing chunk 4/6...
⏳ Summarizing chunk 5/6...
⏳ Summarizing chunk 6/6...


In [12]:
# Step 8: Output final summary
output_file = "final_summary.txt"
with open(output_file, "w", encoding="utf-8") as f:
    f.write(final_summary)

print(f"\n📌 Final Summary saved to {output_file}")
print("\n📌 Final Summary Preview:\n")
print(final_summary[:1000], "...")  # Preview first 1000 characters


📌 Final Summary saved to final_summary.txt

📌 Final Summary Preview:

Zoca is currently active and looking to fill a role for a Product Operations Associate. The company is seeking candidates with strong knowledge of Excel and SQL, good communication skills, and strong analytical and problem-solving skills. The role is based in Bangalore and offers a salary range of 4 to 7 LPA. There are two vacancies available, and the company is looking for candidates with 0-2 years of experience and any educational background.

The interview process for the role consists of one screening round and two technical rounds. Candidates can find more information about the job description on LinkedIn. The role of a Product Operations Associate is crucial in supporting the operational aspects of a product's lifecycle within an organization. This includes ensuring smooth coordination between product management, engineering, and other departments such as marketing and customer support. The key responsibilitie