In [None]:
import os
import pdfplumber
from sentence_transformers import SentenceTransformer
import numpy as np
import requests

def extract_texts_from_pdfs(pdf_folder):
    all_chunks = []
    pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith('.pdf')]
    for pdf_file in pdf_files:
        pdf_path = os.path.join(pdf_folder, pdf_file)
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
            # Chunk each paper
            chunks = chunk_text(text)
            all_chunks.extend(chunks)
    return all_chunks

def chunk_text(text, chunk_size=500):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

#Retrieval and Ollama
def search(query, chunks, embeddings, model, top_k=3):
    query_emb = model.encode([query])[0]
    scores = np.dot(embeddings, query_emb)
    top_indices = np.argsort(scores)[-top_k:][::-1]
    return [chunks[i] for i in top_indices]

def ask_ollama(context, question, model="llama3"):
    prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
    response = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model,
            "prompt": prompt,
            "stream": False
        }
    )
    try:
        data = response.json()
        return data.get("response", data)
    except Exception as e:
        print("Error parsing Ollama response:", e)
        print("Raw response text:", response.text)
        return None

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Embed all chunks
pdf_folder = "/Users/burto/RAG academic"  # Change to your folder
chunks = extract_texts_from_pdfs(pdf_folder)
print(f"Total chunks from all papers: {len(chunks)}")

model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(chunks)

Total chunks from all papers: 174


In [None]:
#Example query
query = "Summarize what makes a strong paper."
results = search(query, chunks, embeddings, model)
context = "\n".join(results)
answer = ask_ollama(context, query)
print("\nLLM Answer:\n", answer)


LLM Answer:
 A strong paper is characterized by:

* A clear and concise thesis statement
* Effective organization and structure, including a detailed outline
* Well-supported arguments with relevant background information, facts, examples, expert opinions, and other supporting details
* Clear and logical connections between ideas
* No wordiness or repetition, with sentences that are precise and easy to understand
* A smooth flow of ideas from one paragraph to the next
* A clear and concise writing style, free from unnecessary jargon or technical terms
* Accurate citation and referencing of sources
* A well-organized and logical structure, including a brief introduction, background information, literature review, methodology, results, and conclusion.


In [None]:
# Example: Use your own PDF as the prompt
user_pdf = "/Users/burto/Downloads/test essay.pdf"  # Path to your PDF
user_text = extract_text_from_pdf(user_pdf)

query = "Give detailed feedback and suggestions for improvement for the following essay."
results = search(query, chunks, embeddings, model)
context = "\n".join(results)

# Combine context and your PDF text
full_context = f"{context}\n\nEssay:\n{user_text}"

answer = ask_ollama(full_context, query)
print("\nLLM Feedback:\n", answer)


LLM Feedback:
 Feedback and Suggestions:

**Strengths:**

* The essay explores a fascinating topic that has captured human imagination for centuries.
* It provides a clear overview of the Martian environment, highlighting its similarities to Earth and potential for supporting life.
* The writer effectively incorporates various scientific findings and arguments to support the possibility of life on Mars.

**Weaknesses:**

1. **Organization:** The essay lacks a clear structure and transitions between paragraphs are abrupt. Consider dividing the text into introduction, body paragraphs, and conclusion.
2. **Depth and analysis:** While the essay presents some interesting points, it does not delve deeply enough into the scientific implications of life on Mars or explore potential criticisms of the argument.
3. **Clarity and concision:** Some sentences are wordy or unclear. Use simpler language to convey complex ideas, and aim for a consistent tone throughout the essay.

**Suggestions:**

1.