# Importing Model and APIs

In [1]:
from utilities import fetch_data
from dotenv import dotenv_values
from groq import Groq
from tqdm import tqdm
from sklearn.cluster import MeanShift
import numpy
from markdown_pdf import MarkdownPdf,Section

import ollama
import time
variables = dotenv_values(".env")
gen_model = "llama3-8b-8192"
embedding_model = "nomic-embed-text"

In [2]:

content = "https://wow.groq.com/retrieval-augmented-generation-with-groq-api/"
content_type = "url"

client = Groq(api_key=variables["GROQ_API_KEY"])

# Prompts

In [3]:
topic_prompt = '''
Given the following excrepts compiled from textbooks and lecture transcripts on a subject.

{content}

Identify core topics discussed and provide them an importance score.
'''

content_prompt = '''
Given the following excrepts compiled from textbooks and lecture transcripts on a subject.

{content}

Clean the contents and make a comprehensive lecture notes on the topics being covered. Stick to the contents
'''

question_prompt = ''' 
Given the following lecture notes.

<lecture_notes>
{lecture_notes}
</lecture_notes>

Topic importance of each topic discussed in the lecture is given below.

<topic importance>
{topic_imp}
<topic importance>
You are a Teacher tasked with setting up a large number of questions for an upcoming examination. The number of questions per topic should depend upon the topic importance.
The questions should include conceptual, reasoning and application level questions. Do not generate answers. Generate questions and not a question distribution
'''

In [4]:
def get_embedding(text):
   return ollama.embeddings(model=embedding_model, prompt=text)['embedding']


In [5]:
def syllabus(content_clusters):
  labels = set(content_clusters.values())
  syllabus_list = []
  for label in tqdm(labels):
    content = "\n".join([ct for ct,lb in content_clusters.items() if lb == label])

    topic_response = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful professor tasked with teaching and testing knowledge of students."
        },
        {
            "role": "user",
            "content": topic_prompt.replace("{content}",content),
        }
    ],
    model=gen_model
).choices[0].message.content
    content_response =  client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful professor tasked with teaching and testing knowledge of students."
        },
        {
            "role": "user",
            "content": content_prompt.replace("{content}",content),
        }
    ],
    model=gen_model
).choices[0].message.content
    
    syllabus_list.append((topic_response,content_response))
    time.sleep(7)

    
  return syllabus_list

In [6]:
def generate_questions(content_tuple):
    content_dict ={"Lecture Note":content_tuple[1],"Topic Importance": content_tuple[0]}
    question_content = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful professor tasked with teaching and testing knowledge of students."
        },
        {
            "role": "user",
            "content": question_prompt.replace("{lecture_notes}",content_tuple[1]).replace("{topic_imp}",content_tuple[0]),
        }
    ],
    model=gen_model
).choices[0].message.content
    content_dict["Question Paper"] = question_content
    time.sleep(30)
    return content_dict

In [7]:
content_text = fetch_data.fetch_input(content,content_type)
content_text = [ct for ct in tqdm(content_text) if ct.replace("\n","").replace(" ","") != ""]
content_embedding = [get_embedding(ct) for ct in tqdm(content_text)]
content_embedding = numpy.array(content_embedding)
clusters = MeanShift().fit(content_embedding)
content_clusters = dict(zip(content_text,clusters.labels_))
content_model = syllabus(content_clusters)
content_dict = [generate_questions(ct) for ct in content_model]

100%|██████████| 1/1 [00:00<00:00, 7145.32it/s]
100%|██████████| 1/1 [00:01<00:00,  1.15s/it]
100%|██████████| 1/1 [00:09<00:00,  9.24s/it]


In [None]:
def write_chapters(content_dict:dict, out_path:str) -> str:
    out_pdf = MarkdownPdf()
    out_content = ""  # Initialize out_content variable
    out_pdf.add_section(Section("# " + content.split("/")[-1].split(".")[0] + "\n"))
    for cd in content_dict:
        out_content += "## Section 01\n"
        out_content += "### Topics Discussed\n"
        out_content += cd["Topic Importance"] + "\n"
        out_content += "### Notes\n"
        out_content += cd["Lecture Note"] + "\n"
        out_content += "### Sample Questions\n"
        out_content += cd["Question Paper"] + "\n\n"
    out_pdf.add_section(Section(out_content))
    out_pdf.save(out_path)


In [None]:
write_chapters(content_dict,"notes.pdf")