In [2]:
# imports
from utils.config import get_env_var
import requests
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from google.ai.generativelanguage_v1beta.types import Tool as GenAITool

In [3]:
# get env vars 
API_KEY_GOOGLE = get_env_var("API_KEY_GOOGLE")
SEARCH_ENGINE_ID = get_env_var("SEARCH_ENGINE_ID")
API_GEMINI_KEY= get_env_var("API_GEMINI_KEY")

In [4]:
# the search query you want
query = "learning web developing"
# using the first page
page = 1
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY_GOOGLE}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"

In [5]:
# make the API request
data = requests.get(url).json()

In [6]:
# get the result items
search_items = data.get("items")
# iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
    try:
        long_description = search_item["pagemap"]["metatags"][0]["og:description"]
    except KeyError:
        long_description = "N/A"
    # get the page title
    title = search_item.get("title")
    # page snippet
    snippet = search_item.get("snippet")
    # alternatively, you can get the HTML snippet (bolded keywords)
    html_snippet = search_item.get("htmlSnippet")
    # extract the page url
    link = search_item.get("link")
    # print the results
    print("="*10, f"Result #{i+start-1}", "="*10)
    print("Title:", title)
    print("Description:", snippet)
    print("Long description:", long_description)
    print("URL:", link, "\n")

Title: how you would learn web development if you could start over ? : r ...
Description: Apr 10, 2023 ... Start by gaining a clear understanding of web development concepts, including HTML (Hypertext Markup Language), CSS (Cascading Style Sheets), ...
Long description: Posted by u/StockFan2047 - 215 votes and 181 comments
URL: https://www.reddit.com/r/Frontend/comments/12h51z1/how_you_would_learn_web_development_if_you_could/ 

Title: Learn web development | web.dev
Description: Explore our growing collection of courses on key web design and development subjects. An industry expert has written each course, helped by members of the ...
Long description: N/A
URL: https://web.dev/learn 

Title: Web Development Courses | Codecademy
Description: Web development courses at Codecademy helps you master HTML, CSS, JavaScript, React, and more. Learn to build web apps and websites.
Long description: Web development courses at Codecademy helps you master HTML, CSS, JavaScript, React, and more. Le

In [7]:
url = "https://web.dev/learn"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
text = soup.get_text()

lines = text.split('\n')
new_txt = ""
per_words = ["learn" , "course" , "resources"]
nb_line_to_fetch = 2
for line in lines:
    stripped_line = line.strip()
    for word in per_words:
        if word.lower() in stripped_line.lower():
            nb_line_to_fetch = 2
            break
    if stripped_line and nb_line_to_fetch>0 and new_txt.find(stripped_line) == -1 :
        new_txt += stripped_line + "\n"
        nb_line_to_fetch -= 1

print(new_txt)

Learn web development  |  web.dev
Skip to main content
Resources
Web Platform
Learn how to build better user experiences.
Performance
Get up to speed on web development.
Learn HTML
Learn CSS
Learn JavaScript
Learn Performance
Learn Accessibility
More courses
Additional resources
Explore content collections, patterns, and more.
Privacy
Images
AI and the web
PageSpeed Insights
Stay organized with collections
Save and categorize content based on your preferences.
Explore our growing collection of courses on key web design and development subjects. An industry expert has written each course, helped by members of the Chrome team. Follow the modules sequentially, or dip into the topics you most want to learn about.
Course
A solid overview of HTML for developers, from novice to expert level HTML.
Start course
A guide to CSS with modules covering everything from accessibility to z-index.
An in-depth course on the basics of JavaScript.
A course for those new to web performance, a vital aspect o

In [31]:
# Initialize model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=API_GEMINI_KEY,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
 
# System prompt with scraped content
system_prompt = """You are a roadmap generator. Based on the topic provided, list of known skill and duration , generate a structured learning roadmap in clean JSON format.

Output Format (JSON):
{
  "topic": "[Learning Topic]",
  "total_duration": "[Total estimated time]",
  "steps": [
    {
      "step_number": 1,
      "title": "[Step Title]",
      "duration": "[Time estimate]",
      "technologie": "[Technologie name]",
      "parent_technology" : [parent Technologie name],
      "difficulty_level" : [float 1-5],
      "sub_topic": "[Job/Field covers name]",
      "study_hours": "[Number of study hours estimate]",
      "description": "[What you'll learn and why]",
      "free_resources": [
        {
          "name": "[Resource Name]",
          "url": "[Working URL]",
          "type": "[tutorial/course/documentation/repository]"
        }
      ],
      "paid_resources": [
        {
          "name": "[Resource Name]",
          "url": "[Working URL]",
          "type": "[course/certification]",
          "platform": "[Udemy/Coursera/etc]"
        }
      ]
    }
  ]
}

Requirements:
- don't interact with the human (example don't say since you know or here are..,etc...)
- study hours  need to be realistic on average  3h or 2h
- Return ONLY valid JSON, no markdown formatting or code blocks
- Provide 8-12 steps in logical learning order
- Include 2 free resources and 2 paid resources per step
- Use realistic durations (1 week to 8 weeks per step)
- Ensure all URLs are valid and accessible
- Make brief descriptions and only focus on  the step without the mention of the human knowledge 
- Organize steps from beginner to advanced
- we need valid json in string format always no list,ect
- duration provided always use weeks as unit
- skills given are skills that the humain already know ,you use them in adavantage to make the roadmap more common(examples: skip learning that skill or lower the duration of learning it etc ...)
- if the skill has nothing in common with topic you can skip follow it however it's good to follow skills
- you can add up to 2 weeks to the duration given (example 15 weeks you 're max is 17)
- sub_topic is the name of field , job or ecosystem that cover (fontend ,backend , math ,ect...)
- parent_technology is the from what the technology chosen in the step built from
"""

messages = [
    ("system", system_prompt),
    ("human", "Topic :Web Development skills :[Python , html] , duration: 15"),
]

response = llm.invoke(messages, tools=[GenAITool(google_search={})])
print(response.content)

```json
{
  "topic": "Web Development",
  "total_duration": "17 weeks",
  "steps": [
    {
      "step_number": 1,
      "title": "HTML Fundamentals",
      "duration": "1 week",
      "technologie": "HTML5",
      "parent_technology": "Markup Languages",
      "difficulty_level": 1.0,
      "sub_topic": "Frontend Development",
      "study_hours": "2h",
      "description": "Learn the basic building blocks of web pages, including elements, attributes, and structure. Since you have prior HTML knowledge, focus on HTML5 semantics and best practices.",
      "free_resources": [
        {
          "name": "Mozilla Developer Network (MDN) HTML Tutorial",
          "url": "https://developer.mozilla.org/en-US/docs/Web/HTML",
          "type": "documentation"
        },
        {
          "name": "freeCodeCamp Responsive Web Design",
          "url": "https://www.freecodecamp.org/learn/responsive-web-design/",
          "type": "tutorial"
        }
      ],
      "paid_resources": [
        

In [32]:
import json
res =response.content
res[0]
parsed_json_res=json.loads(res[7:-3])
print(parsed_json_res['steps'][0].keys())
print(parsed_json_res.keys())



dict_keys(['step_number', 'title', 'duration', 'technologie', 'parent_technology', 'difficulty_level', 'sub_topic', 'study_hours', 'description', 'free_resources', 'paid_resources'])
dict_keys(['topic', 'total_duration', 'steps'])
