In [None]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken
!pip install pypdf
!pip install pdfplumber
!pip install pytesseract



In [None]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import os
from google.colab import userdata
import pdfplumber
from pytesseract import pytesseract
from pdfminer.high_level import extract_text

# Get your API keys from openai, you will need to create an account.
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview
os.environ["OPENAI_API_KEY"] = userdata.get('OpenAI')

# Specify the file paths for the two PDF files
student_profile_path = '/content/Student_profile_2.pdf'
class_notes_path = '/content/Cyber-Attacks-Life-Cycle.pdf'

# Process the student profile PDF
with pdfplumber.open(student_profile_path) as student_pdf:
    num_pages = len(student_pdf.pages)
    print(f"Number of pages in student profile: {num_pages}")
    student_raw_text = ''
    for i in range(num_pages):
        page = student_pdf.pages[i]
        text = page.extract_text()
        if text:
            student_raw_text += text

# Process the class notes PDF
with pdfplumber.open(class_notes_path) as notes_pdf:
    num_pages = len(notes_pdf.pages)
    print(f"Number of pages in class notes: {num_pages}")
    notes_raw_text = ''
    for i in range(num_pages):
        page = notes_pdf.pages[i]
        text = page.extract_text()
        if text:
            notes_raw_text += text

# Split the student profile text into smaller chunks
student_text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=100,
    length_function=len,
)
student_texts = student_text_splitter.split_text(student_raw_text)
print(f"Number of text chunks in student profile: {len(student_texts)}")

# Split the class notes text into smaller chunks
notes_text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=100,
    length_function=len,
)
notes_texts = notes_text_splitter.split_text(notes_raw_text)
print(f"Number of text chunks in class notes: {len(notes_texts)}")

# Set up the prompt template for generating the student profile
student_profile_prompt_template = """
You are an education counselor responsible for creating a student profile based on the information provided about the student.

The context about the student is as follows:

{context}

Based on this information, please generate a comprehensive student profile including the following:

Student Name:
Student Age:
Strengths:
Weaknesses:
Preferred Learning Modes:
Academic Progress:
Recommended Interventions (if any):

Provide a detailed profile covering all the relevant aspects of the student's academic and personal background.
"""

student_profile_prompt = PromptTemplate(template=student_profile_prompt_template, input_variables=["context"])

# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()

# Create a FAISS vector store for the student profile
student_docsearch = FAISS.from_texts(student_texts, embeddings)

# Create a FAISS vector store for the class notes
notes_docsearch = FAISS.from_texts(notes_texts, embeddings)

# Set up the LLM and the chain for the student profile
llm = OpenAI(temperature=0.7)
student_profile_chain = load_qa_chain(llm, chain_type="stuff")

# Generate the student profile
for student_text in student_texts:
    student_query = student_profile_prompt.format(context=student_text)
    student_docs = student_docsearch.similarity_search(student_query)
    student_profile = student_profile_chain.run(input_documents=student_docs, question=student_query)
    print(student_profile)
    print("\n---\n")

# Create a prompt for generating questions based on the student's weaknesses
weakness_prompt_template = """
Based on the student profile, the student's weaknesses are:

{weaknesses}

Generate 3 questions that could be asked in class to help address these weaknesses.
"""

weakness_prompt = PromptTemplate(template=weakness_prompt_template, input_variables=["weaknesses"])

# Find the student's weaknesses from the generated profile
for student_text in student_texts:
    student_query = student_profile_prompt.format(context=student_text)
    student_docs = student_docsearch.similarity_search(student_query)
    student_profile = student_profile_chain.run(input_documents=student_docs, question=student_query)
    # Extract the weaknesses from the student profile
    weaknesses = [line for line in student_profile.split("\n") if line.startswith("Weaknesses:")]
    if weaknesses:
        weaknesses = weaknesses[0].split(":")[1].strip()
        break

# Generate questions based on the student's weaknesses
questions_query = weakness_prompt.format(weaknesses=weaknesses)
notes_docs = notes_docsearch.similarity_search(questions_query)
questions = student_profile_chain.run(input_documents=notes_docs, question=questions_query)
print(questions)

Number of pages in student profile: 2
Number of pages in class notes: 3
Number of text chunks in student profile: 3
Number of text chunks in class notes: 20


  warn_deprecated(
  warn_deprecated(
  warn_deprecated(



Student Name: Jessica Lee
Student Age: 15
Strengths:
- Strong analytical skills
- Excellent verbal communication
- Solid understanding of algebra and geometry
- Enthusiastic about biology and enjoys hands-on experiments
- Good comprehension skills
- Active participant in group activities
- Empathetic towards peers
Academic Progress:
- Mathematics:
Midterm Exam: 85
Final Exam: 88
- Science:
Lab Reports: 80
Quizzes: 75
Final Exam: 82
- English:
Literature Analysis: 72
Essay Writing: 68
Recommended Interventions (if any):
Based on Jessica's academic progress, it is recommended that she seeks extra help in essay writing and literature analysis to improve her grades in English.
Preferred Learning Modes:
Jessica enjoys collaborative learning settings with interactive activities. She responds well to visual aids, hands-on experiments, and group discussions.
Weaknesses:
Jessica needs to work on her essay writing and literature analysis skills. She also struggles with understanding the Reconna

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.25.0-py3-none-any.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.1-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.15.0 (from gradio)
  Downloading gradio_client-0.15.0-py3-none-any.whl (313 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.4/313.4 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting python-multipart>=0.0.9 (from 

In [None]:
import os
import gradio as gr
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import pdfplumber

def process_pdfs(student_data_path, class_notes_path):
    # Process the student data PDF
    with pdfplumber.open(student_data_path) as student_pdf:
        num_pages = len(student_pdf.pages)
        print(f"Number of pages in student data: {num_pages}")
        student_raw_text = ''
        for i in range(num_pages):
            page = student_pdf.pages[i]
            text = page.extract_text()
            if text:
                student_raw_text += text

    # Process the class notes PDF
    with pdfplumber.open(class_notes_path) as notes_pdf:
        num_pages = len(notes_pdf.pages)
        print(f"Number of pages in class notes: {num_pages}")
        notes_raw_text = ''
        for i in range(num_pages):
            page = notes_pdf.pages[i]
            text = page.extract_text()
            if text:
                notes_raw_text += text

    # Split the student data text into smaller chunks
    student_text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=500,
        chunk_overlap=100,
        length_function=len,
    )
    student_texts = student_text_splitter.split_text(student_raw_text)
    print(f"Number of text chunks in student data: {len(student_texts)}")

    # Split the class notes text into smaller chunks
    notes_text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=500,
        chunk_overlap=100,
        length_function=len,
    )
    notes_texts = notes_text_splitter.split_text(notes_raw_text)
    print(f"Number of text chunks in class notes: {len(notes_texts)}")

    # Set up the prompt template for generating the student profile
    student_profile_prompt_template = """
    You are an education counselor responsible for creating a student profile based on the information provided about the student.

    The context about the student is as follows:

    {context}

    Based on this information, please generate a comprehensive student profile including the following:

    Student Name:
    Student Age:
    Strengths:
    Weaknesses:
    Preferred Learning Modes:
    Academic Progress:
    Recommended Interventions (if any):

    Provide a detailed profile covering all the relevant aspects of the student's academic and personal background.
    """

    student_profile_prompt = PromptTemplate(template=student_profile_prompt_template, input_variables=["context"])

    # Download embeddings from OpenAI
    embeddings = OpenAIEmbeddings()

    # Create a FAISS vector store for the student data
    student_docsearch = FAISS.from_texts(student_texts, embeddings)

    # Create a FAISS vector store for the class notes
    notes_docsearch = FAISS.from_texts(notes_texts, embeddings)

    # Set up the LLM and the chain for the student profile
    llm = OpenAI(temperature=0.7)
    student_profile_chain = load_qa_chain(llm, chain_type="stuff")

    # Generate the student profile
    student_profile = ""
    for student_text in student_texts:
        student_query = student_profile_prompt.format(context=student_text)
        student_docs = student_docsearch.similarity_search(student_query)
        student_profile += student_profile_chain.run(input_documents=student_docs, question=student_query)
        student_profile += "\n---\n"

    # Create a prompt for generating questions based on the student's weaknesses
    weakness_prompt_template = """
    Based on the student profile, the student's weaknesses are:

    {weaknesses}

    Generate 3 questions that could be asked in class to help address these weaknesses.
    """

    weakness_prompt = PromptTemplate(template=weakness_prompt_template, input_variables=["weaknesses"])

    # Find the student's weaknesses from the generated profile
    weaknesses = ""
    for student_text in student_texts:
        student_query = student_profile_prompt.format(context=student_text)
        student_docs = student_docsearch.similarity_search(student_query)
        student_profile_text = student_profile_chain.run(input_documents=student_docs, question=student_query)
        # Extract the weaknesses from the student profile
        weakness_lines = [line for line in student_profile_text.split("\n") if line.startswith("Weaknesses:")]
        if weakness_lines:
            weaknesses = weakness_lines[0].split(":")[1].strip()
            break

    # Generate questions based on the student's weaknesses
    questions_query = weakness_prompt.format(weaknesses=weaknesses)
    notes_docs = notes_docsearch.similarity_search(questions_query)
    questions = student_profile_chain.run(input_documents=notes_docs, question=questions_query)

    return student_profile, questions

def main():
    with gr.Blocks() as demo:
        gr.Markdown("## PDF Processing Interface")

        with gr.Row():
            with gr.Column():
                student_data_path = gr.File(label="Student Data PDF")
            with gr.Column():
                class_notes_path = gr.File(label="Class Notes PDF")

        process_button = gr.Button("Process PDFs")
        student_profile_output = gr.Textbox(label="Student Profile", lines=10)
        questions_output = gr.Textbox(label="Generated Questions", lines=3)

        process_button.click(
            process_pdfs,
            inputs=[student_data_path, class_notes_path],
            outputs=[student_profile_output, questions_output]
        )

    demo.launch()

if __name__ == "__main__":
    main()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://013905ad225e9e5ed1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
