In [25]:
from pydantic import BaseModel, Field
from typing import List

class ChapterSummary(BaseModel):
    chapter_title: str = Field(..., description="Title of the chapter")
    long_summary: str = Field(..., description="A detailed summary of the chapter content (Max 150 words)")
    short_summary: str = Field(..., description="A concise summary of the chapter content (Max 100 words)")
    very_short_summary: str = Field(..., description="A brief summary of the chapter content (Max 50 words)")
    key_points: List[str] = Field(..., description="List of practical takeaways or key points from the chapter")
    jargon_list: List[str] = Field(..., description="List of key marketing terms or jargon used in the chapter")

class MarketingCourseSummary(BaseModel):
    course_name: str = Field(..., description="Name of the marketing course")
    chapters: List[ChapterSummary] = Field(..., description="List of chapter summaries for the course")

In [26]:
import os

In [27]:
def fetch_chapters_from_folder(folder_path):
    chapter_texts = []
    for file_name in sorted(os.listdir(folder_path)):
        if file_name.endswith(".txt"):
            chapter_name = file_name.replace(".txt", "").replace("_", " ").title()
            with open(os.path.join(folder_path, file_name), "r", encoding="utf-8") as file:
                content = file.read()
            chapter_texts.append(f"## {chapter_name}\n{content}\n")
    return "\n".join(chapter_texts)

In [28]:
chapter_text = fetch_chapters_from_folder("/home/ravi0531rp/Desktop/CODES/p-projects/coursera_bot/data/")

In [29]:
print(chapter_text)

## 001 Building Strong Bands 1
[MUSIC] Hello, I'm Barbara Kahn, and I'm a professor of marketing at the Wharton
School. And, I'm here to talk to you about
marketing. So this, this segment is Marketing 101,
the basics, the principles of marketing. And my focus is going to be on building
strong brands because of course the essence of marketing is to have
a very strong brand. So, let's start off with the first question, a very basic question but maybe
not as obvious as you might think.
Which is what is marketing? And I'm going to argue that marketing is
the studies of a market. So what's a market? A market is an exchange between two
partners, frequently a buyer and a seller, but marketing also, applies to non-profit
or things where there isn't necessarily money
being transacted. But what you need for marketing to exist
or for a market to exist is to have an exchange. And what I'm going to argue is that what
marketing means is going to differ as a function of different
aspects of those exc

In [30]:
import os
from dotenv import load_dotenv
import json
import ast
load_dotenv("/home/ravi0531rp/Desktop/CODES/p-projects/PlanSage/plansage/.env")


GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
GROQ_API_KEY=os.getenv("GROQ_API_KEY")
LANGCHAIN_API_KEY=os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT=os.getenv("LANGCHAIN_PROJECT")


os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
os.environ["GROQ_API_KEY"]= GROQ_API_KEY
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"]=LANGCHAIN_PROJECT

In [31]:
import google.generativeai as genai
from json_repair import repair_json

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [32]:
json_schema = MarketingCourseSummary.model_json_schema()

print(json_schema)

{'$defs': {'ChapterSummary': {'properties': {'chapter_title': {'description': 'Title of the chapter', 'title': 'Chapter Title', 'type': 'string'}, 'long_summary': {'description': 'A detailed summary of the chapter content (Max 150 words)', 'title': 'Long Summary', 'type': 'string'}, 'short_summary': {'description': 'A concise summary of the chapter content (Max 100 words)', 'title': 'Short Summary', 'type': 'string'}, 'very_short_summary': {'description': 'A brief summary of the chapter content (Max 50 words)', 'title': 'Very Short Summary', 'type': 'string'}, 'key_points': {'description': 'List of practical takeaways or key points from the chapter', 'items': {'type': 'string'}, 'title': 'Key Points', 'type': 'array'}, 'jargon_list': {'description': 'List of key marketing terms or jargon used in the chapter', 'items': {'type': 'string'}, 'title': 'Jargon List', 'type': 'array'}}, 'required': ['chapter_title', 'long_summary', 'short_summary', 'very_short_summary', 'key_points', 'jargon_

In [33]:
model = genai.GenerativeModel(
  "models/gemini-1.5-flash",
  system_instruction=f"""You are an expert Professor of Marketing at University of Pennsylvenia. Given the text from chapterwise transcripts of the course, you go through it in detail. And then, generate the notes for the students in the given format. In the longer notes, do not gloss over any detail. The longest notes should be enough to cover the chapter's revision notes.
  Using this JSON schema:
    Summary = {json_schema}
  Return a `Summary`. 
  """,
    generation_config={"response_mime_type": "application/json",
                       "temperature" : 0.0
                       }
)

In [34]:

response = model.generate_content([chapter_text])
good_json_string = repair_json(response.text)
response_json = json.loads(good_json_string)



In [35]:
with open("./summaries/sum_1_2.json", "w") as json_file:
    json.dump(response_json, json_file, indent=4)

In [36]:
def create_markdown_from_json(json_response, output_file):
    md_content = [f"# {json_response['course_name']}"]

    for chapter in json_response["chapters"]:
        md_content.append(f"## {chapter['chapter_title']}")
        md_content.append("### Long Summary")
        md_content.append(chapter["long_summary"])
        md_content.append("### Short Summary")
        md_content.append(chapter["short_summary"])
        md_content.append("### Very Short Summary")
        md_content.append(chapter["very_short_summary"])
        md_content.append("### Key Points")
        md_content.extend([f"- {point}" for point in chapter["key_points"]])
        md_content.append("### Jargon List")
        md_content.extend([f"- {jargon}" for jargon in chapter["jargon_list"]])
        md_content.append("")  # Blank line for spacing

    with open(output_file, "w", encoding="utf-8") as file:
        file.write("\n".join(md_content))

In [37]:
output_file = "./summaries/sum1_2.md"
create_markdown_from_json(response_json, output_file)