# Importing Model and APIs

In [44]:
from utilities import fetch_data
from dotenv import dotenv_values
import openai 
from tqdm import tqdm
from utilities import fetch_data
from sklearn.cluster import MeanShift
import numpy
from markdown_pdf import MarkdownPdf,Section
variables = dotenv_values(".env")
gen_model = "mistral-large-latest"
embedding_model = "mistral-embed"

In [3]:
import mistralai.client

openai.api_key = variables["OPENAI_API_KEY"] #For video support
content = variables["CONTENT"]
content_type = variables["CONTENT_TYPE"]

client = mistralai.client.MistralClient(api_key=variables["MISTRAL_API_KEY"])

# Prompts

In [24]:
topic_prompt = '''
Given the following excrepts compiled from textbooks and lecture transcripts on a subject.

{content}

Identify core topics discussed and provide them an importance score.
'''

content_prompt = '''
Given the following excrepts compiled from textbooks and lecture transcripts on a subject.

{content}

Clean the contents and make a comprehensive lecture notes on the topics being covered. Stick to the contents
'''

question_prompt = ''' 
Given the following lecture notes.

<lecture_notes>
{lecture_notes}
</lecture_notes>

Topic importance of each topic discussed in the lecture is given below.

<topic importance>
{topic_imp}
<topic importance>
You are a Teacher tasked with setting up a large number of questions for an upcoming examination. The number of questions per topic should depend upon the topic importance.
The questions should include conceptual, reasoning and application level questions. Do not generate answers. Generate questions and not a question distribution
'''

In [18]:
def get_embedding(text):
   return client.embeddings(model=embedding_model,input=[text]).data[0].embedding


In [34]:
def syllabus(content_clusters):
  labels = set(content_clusters.values())
  syllabus_list = []
  for label in tqdm(labels):
    content = "\n".join([ct for ct,lb in content_clusters.items() if lb == label])
    topic_response = client.chat(model=gen_model,messages=[mistralai.models.chat_completion.ChatMessage(role="user", content=topic_prompt.replace("{content}",content))]).choices[0].message.content
    content_response = client.chat(model=gen_model,messages=[mistralai.models.chat_completion.ChatMessage(role="user", content=content_prompt.replace("{content}",content))]).choices[0].message.content
    syllabus_list.append((topic_response,content_response))
  return syllabus_list

In [38]:
def generate_questions(content_tuple):
    content_dict ={"Lecture Note":content_tuple[1],"Topic Importance": content_tuple[0]}
    
    question_content = client.chat(model=gen_model,messages=[mistralai.models.chat_completion.ChatMessage(role="user", content=question_prompt.replace("{lecture_notes}",content_tuple[1]).replace("{topic_imp}",content_tuple[0]))]).choices[0].message.content

    content_dict["Question Paper"] = question_content
    return content_dict

In [39]:
content_text = fetch_data.fetch_input(content,content_type)
content_text = [ct for ct in tqdm(content_text) if ct.replace("\n","").replace(" ","") != ""]
content_embedding = [get_embedding(ct) for ct in tqdm(content_text)]
content_embedding = numpy.array(content_embedding)
clusters = MeanShift().fit(content_embedding)
content_clusters = dict(zip(content_text,clusters.labels_))
content_model = syllabus(content_clusters)
content_dict = [generate_questions(ct) for ct in content_model]

In [62]:
def write_chapters(content_dict:dict, out_path:str) -> str:
    out_pdf = MarkdownPdf()
    out_content = ""  # Initialize out_content variable
    out_pdf.add_section(Section("# " + content.split("/")[-1].split(".")[0] + "\n"))
    for cd in content_dict:
        out_content += "## Section 01\n"
        out_content += "### Topics Discussed\n"
        out_content += cd["Topic Importance"] + "\n"
        out_content += "### Notes\n"
        out_content += cd["Lecture Note"] + "\n"
        out_content += "### Sample Questions\n"
        out_content += cd["Question Paper"] + "\n\n"
    out_pdf.add_section(Section(out_content))
    out_pdf.save(out_path)
