In [18]:
# imports
from utils.config import get_env_var
import requests
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
import asyncio
from prisma import Prisma
import json

In [19]:
# get env vars 
API_KEY_GOOGLE = get_env_var("API_KEY_GOOGLE")
SEARCH_ENGINE_ID = get_env_var("SEARCH_ENGINE_ID")
API_GEMINI_KEY= get_env_var("API_GEMINI_KEY")

In [20]:
# the search query you want
query = "learning web developing"
# using the first page
page = 1
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY_GOOGLE}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"

In [21]:
# make the API request
data = requests.get(url).json()

In [22]:
# get the result items
search_items = data.get("items")
# iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
    try:
        long_description = search_item["pagemap"]["metatags"][0]["og:description"]
    except KeyError:
        long_description = "N/A"
    # get the page title
    title = search_item.get("title")
    # page snippet
    snippet = search_item.get("snippet")
    # alternatively, you can get the HTML snippet (bolded keywords)
    html_snippet = search_item.get("htmlSnippet")
    # extract the page url
    link = search_item.get("link")
    # print the results
    print("="*10, f"Result #{i+start-1}", "="*10)
    print("Title:", title)
    print("Description:", snippet)
    print("Long description:", long_description)
    print("URL:", link, "\n")

Title: how you would learn web development if you could start over ? : r ...
Description: Apr 10, 2023 ... Start by gaining a clear understanding of web development concepts, including HTML (Hypertext Markup Language), CSS (Cascading Style Sheets), ...
Long description: Posted by u/StockFan2047 - 215 votes and 181 comments
URL: https://www.reddit.com/r/Frontend/comments/12h51z1/how_you_would_learn_web_development_if_you_could/ 

Title: Learn web development | web.dev
Description: Explore our growing collection of courses on key web design and development subjects. An industry expert has written each course, helped by members of the ...
Long description: N/A
URL: https://web.dev/learn 

Title: Web Development Courses | Codecademy
Description: Web development courses at Codecademy helps you master HTML, CSS, JavaScript, React, and more. Learn to build web apps and websites.
Long description: Web development courses at Codecademy helps you master HTML, CSS, JavaScript, React, and more. Le

In [23]:

#url = "https://web.dev/learn"
#page = requests.get(url)
#soup = BeautifulSoup(page.content, "html.parser")
#text = soup.get_text()

#lines = text.split('\n')
#new_txt = ""
#per_words = ["learn" , "course" , "resources"]
#nb_line_to_fetch = 2
#for line in lines:
#    stripped_line = line.strip()
#    for word in per_words:
#        if word.lower() in stripped_line.lower():
#            nb_line_to_fetch = 2
#            break
#    if stripped_line and nb_line_to_fetch>0 and new_txt.find(stripped_line) == -1 :
#        new_txt += stripped_line + "\n"
#        nb_line_to_fetch -= 1
#
#print(new_txt)


In [24]:
# Initialize model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=API_GEMINI_KEY,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
 
# System prompt with scraped content
system_prompt = """You are a roadmap generator. Based on the topic provided, list of known skill and duration , generate a structured learning roadmap in clean JSON format.

Output Format (JSON):
{
  "topic": "[Learning Topic]",
  "total_duration": "[Total estimated time]",
  "steps": [
    {
      "step_number": 1,
      "title": "[Step Title]"(string),
      "duration": "[Time estimate]" (number),
      "technologie": "[Technologie name]"(string),
      "parent_technology" : [e.g., "Python" for Flask/Django , Java for spring boot ...](string),
      "difficulty_level" : [float 1-5],
      "sub_topic": "[Job/Field covers name]"(string),
      "study_hours_per_day": "[Number of study hours estimate]"(number),
      "description": "[What you'll learn and why]" (string),
      "step_type" :["required", "alternative", "optional"],
      "free_resources": [
        {
          "name": "[Resource Name]"(string),
          "url": "[Working URL]"(string),
          "type": "[tutorial/course/documentation/repository]"
        }
      ],
      "paid_resources": [
        {
          "name": "[Resource Name]"(string),
          "url": "[Working URL]"(string),
          "type": "[course/certification]",
          "platform": "[Udemy/Coursera/etc]"
        }
      ]
    }
  ]
}

Requirements:
- don't interact with the human (example don't say since you know or here are..,etc...)
- study hours per day  need to be realistic on average  3h or 2h and the unit is second 
- Return ONLY valid JSON, no markdown formatting or code blocks
- Provide 8-12 steps in logical learning order
- Include 2 free resources and 2 paid resources per step
- Use realistic durations (1 week to 8 weeks per step)
- Ensure all URLs are valid and accessible
- Make brief descriptions and only focus on  the step without the mention of the human knowledge 
- Organize steps from beginner to advanced
- we need valid json in string format always no list,ect
- duration provided always use weeks as unit
- skills given are skills that the humain already know ,you use them in adavantage to make the roadmap more common(examples: skip learning that skill or lower the duration of learning it etc ...)
- if the skill has nothing in common with topic you can skip follow it however it's good to follow skills
- you can add up to 2 weeks to the duration given (example 15 weeks you 're max is 17)
- sub_topic is the name of field , job or ecosystem that cover (fontend ,backend , math ,ect...)
- parent_technology is the from what the technology chosen in the step built from
- dunration is a number not string
"""

messages = [
    ("system", system_prompt),
    ("human", "Topic :Web Development skills :[Python , html] , duration: 15"),
]

response = llm.invoke(messages, tools=[GenAITool(google_search={})])
print(response.content)

```json
{
  "topic": "Web Development",
  "total_duration": "17 weeks",
  "steps": [
    {
      "step_number": 1,
      "title": "HTML Fundamentals",
      "duration": 1,
      "technologie": "HTML",
      "parent_technology": "None",
      "difficulty_level": 1,
      "sub_topic": "Frontend",
      "study_hours_per_day": 7200,
      "description": "Learn the basic structure of HTML documents, tags, and attributes to create web content.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Mozilla Developer Network (MDN) - HTML",
          "url": "https://developer.mozilla.org/en-US/docs/Web/HTML",
          "type": "documentation"
        },
        {
          "name": "freeCodeCamp - HTML",
          "url": "https://www.freecodecamp.org/learn/responsive-web-design/basic-html-and-html5/",
          "type": "tutorial"
        }
      ],
      "paid_resources": [
        {
          "name": "Udemy - HTML, CSS, JavaScript & More! Complete Web Developmen

In [25]:
res =response.content
parsed_json_res=json.loads(res[7:-3])
print(parsed_json_res['steps'][0].keys())
print(parsed_json_res.keys())



dict_keys(['step_number', 'title', 'duration', 'technologie', 'parent_technology', 'difficulty_level', 'sub_topic', 'study_hours_per_day', 'description', 'step_type', 'free_resources', 'paid_resources'])
dict_keys(['topic', 'total_duration', 'steps'])


In [26]:
prisma = Prisma()
print(parsed_json_res['topic'])
await prisma.connect()
topic_to_create = await prisma.topic.create(
    data = {
        'title' : parsed_json_res['topic']
    }
)
topic = await prisma.topic.find_first(
    where={
        'title': parsed_json_res['topic']
    }
)
step_to_create = await prisma.skill.create(
        data={ 
        'title' :   step['title'],
        'parent_topic_id': topic.id,
        'subtopic': step['sub_topic'],
        'description': step['description'],
        'duration' : step['duration'],
        'study_hours_per_day' : step['study_hours_per_day'],
        'technologie' :  step['technologie'],
        'parent_technology': step['parent_technology'] ,
        'skill_type ' : step['step_type'].upper(), 
        'free_resources' : json.dumps(step['free_resources']),
        #'paid_resources' : step['paid_resources']

        }
)


await prisma.disconnect()


Web Development
