In [1]:
import os
import pdfplumber
from sentence_transformers import SentenceTransformer
import numpy as np
import requests

def extract_texts_from_pdfs(pdf_folder):
    all_chunks = []
    pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith('.pdf')]
    for pdf_file in pdf_files:
        pdf_path = os.path.join(pdf_folder, pdf_file)
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
            # Chunk each paper
            chunks = chunk_text(text)
            all_chunks.extend(chunks)
    return all_chunks

def chunk_text(text, chunk_size=500):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

#Retrieval and Ollama
def search(query, chunks, embeddings, model, top_k=3):
    query_emb = model.encode([query])[0]
    scores = np.dot(embeddings, query_emb)
    top_indices = np.argsort(scores)[-top_k:][::-1]
    return [chunks[i] for i in top_indices]

def ask_ollama(context, question, model="llama3"):
    prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
    response = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model,
            "prompt": prompt,
            "stream": False
        }
    )
    try:
        data = response.json()
        return data.get("response", data)
    except Exception as e:
        print("Error parsing Ollama response:", e)
        print("Raw response text:", response.text)
        return None

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Embed all chunks
relative_path = os.getcwd()
filename = "/RAG-academic"
absolute_path = relative_path + filename
pdf_folder = absolute_path  # Change to your folder
chunks = extract_texts_from_pdfs(pdf_folder)
print(f"Total chunks from all papers: {len(chunks)}")

model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(chunks)

Total chunks from all papers: 174


In [3]:
#Example query
query = "Summarize what makes a strong paper."
results = search(query, chunks, embeddings, model)
context = "\n".join(results)
answer = ask_ollama(context, query)
print("\nLLM Answer:\n", answer)


LLM Answer:
 According to the context, a strong paper is characterized by the following features:

1. **Clear thesis statement**: A well-defined and arguable thesis that sets the tone for the entire paper.
2. **Well-organized structure**: A logical flow of ideas, with clear transitions between paragraphs and sections.
3. **Strong arguments and evidence**: Well-supported claims backed by credible sources, examples, and expert opinions.
4. **Good notes and outlining**: Thorough note-taking and outlining help to ensure that the paper is well-planned and coherent.
5. **Effective writing style**: Clear, concise, and engaging prose that holds the reader's attention.
6. **Revised and edited content**: A paper that has been thoroughly revised and edited to eliminate errors, clarify ideas, and improve overall quality.

By following these guidelines, students can produce a strong paper that effectively communicates their ideas and arguments.


In [9]:
# Example: Use your own PDF as the prompt
filename = "/test-essay.pdf"
user_pdf = relative_path + filename
user_text = extract_text_from_pdf(user_pdf)

query = "Give detailed feedback and suggestions for improvement for the following essay."
results = search(query, chunks, embeddings, model)
context = "\n".join(results)

system_instruction = (
    "System: You are an expert writing coach. "
    "Always give detailed, constructive feedback and suggestions for improvement."
)

# Combine context and your PDF text
full_context = f"{system_instruction}\n\n{context}\n\nEssay:\n{user_text}"
print(f"Full context length: {full_context} characters")

Full context length: System: You are an expert writing coach. Always give detailed, constructive feedback and suggestions for improvement.

the students’ responses by the present author. 28
Therefore, it can be concluded that... n The implication of this is... (Adapted from Sheehy, 2016, p. 11) 60 ACADEMIC WRITING HANDBOOK FOR LEARNERS IN THE FURTHER EDUCATION AND TRAINING (FET) SECTOR Writing recommendations You may or may not be required to include recommendations and the assessment brief will indicate if these are required. Having written your conclusion(s) it should be relatively straightforward to identify some recommendations. These can include suggestions for improvements in relation to the key points in your argument and/or suggestions for future research or development. Recommendations should be realistic and brief. Here are some phrases that may be useful in writing the recommendations of your written assessment work: n The results of this study highlight the need for further

In [8]:

answer = ask_ollama(full_context, query)
print("\nLLM Feedback:\n", answer)


LLM Feedback:
 **Feedback and Suggestions**

The Potential of Life on Mars is an engaging and well-structured essay that effectively explores the possibility of life existing on Mars. The author presents a clear argument, supported by relevant scientific evidence, and demonstrates a good understanding of the subject matter.

**Strengths:**

1. **Clear structure**: The essay follows a logical structure, with each paragraph building upon the previous one to create a cohesive narrative.
2. **Effective use of evidence**: The author skillfully incorporates scientific findings, such as seasonal methane emissions and extremophiles on Earth, to support their argument.
3. **Good transitions**: The essay flows smoothly between paragraphs, thanks to effective transition phrases.

**Suggestions for Improvement:**

1. **Conclude more strongly**: While the conclusion is clear, it could be strengthened by summarizing the main points and reiterating the significance of finding life on Mars.
2. **Use 