In [1]:
# imports
from utils.config import get_env_var
import requests
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
import asyncio
from prisma import Prisma
import json
from typing import *
from math import sqrt

In [2]:
# get env vars 
API_KEY_GOOGLE = get_env_var("API_KEY_GOOGLE")
SEARCH_ENGINE_ID = get_env_var("SEARCH_ENGINE_ID")
API_GEMINI_KEY= get_env_var("API_GEMINI_KEY")

In [4]:
# the search query you want
query = "learning web developing"
# using the first page
page = 1
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY_GOOGLE}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"

In [5]:
# make the API request
data = requests.get(url).json()

In [6]:
# get the result items
search_items = data.get("items")
# iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
    try:
        long_description = search_item["pagemap"]["metatags"][0]["og:description"]
    except KeyError:
        long_description = "N/A"
    # get the page title
    title = search_item.get("title")
    # page snippet
    snippet = search_item.get("snippet")
    # alternatively, you can get the HTML snippet (bolded keywords)
    html_snippet = search_item.get("htmlSnippet")
    # extract the page url
    link = search_item.get("link")
    # print the results
    print("="*10, f"Result #{i+start-1}", "="*10)
    print("Title:", title)
    print("Description:", snippet)
    print("Long description:", long_description)
    print("URL:", link, "\n")

Title: how you would learn web development if you could start over ? : r ...
Description: Apr 10, 2023 ... Start by gaining a clear understanding of web development concepts, including HTML (Hypertext Markup Language), CSS (Cascading Style Sheets), ...
Long description: Posted by u/StockFan2047 - 216 votes and 182 comments
URL: https://www.reddit.com/r/Frontend/comments/12h51z1/how_you_would_learn_web_development_if_you_could/ 

Title: Learn web development | web.dev
Description: Explore our growing collection of courses on key web design and development subjects. An industry expert has written each course, helped by members of the ...
Long description: N/A
URL: https://web.dev/learn 

Title: Web Development Courses | Codecademy
Description: Web development is the practice of developing websites and web apps that live on the internet. Whether you're interested in front-end, back-end, ...
Long description: Web development courses at Codecademy helps you master HTML, CSS, JavaScript, Rea

In [7]:

#url = "https://web.dev/learn"
#page = requests.get(url)
#soup = BeautifulSoup(page.content, "html.parser")
#text = soup.get_text()

#lines = text.split('\n')
#new_txt = ""
#per_words = ["learn" , "course" , "resources"]
#nb_line_to_fetch = 2
#for line in lines:
#    stripped_line = line.strip()
#    for word in per_words:
#        if word.lower() in stripped_line.lower():
#            nb_line_to_fetch = 2
#            break
#    if stripped_line and nb_line_to_fetch>0 and new_txt.find(stripped_line) == -1 :
#        new_txt += stripped_line + "\n"
#        nb_line_to_fetch -= 1
#
#print(new_txt)


In [8]:
# Initialize model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=API_GEMINI_KEY,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
 
# System prompt with scraped content
system_prompt = """You are a roadmap generator. Based on the topic provided, list of known skill and duration , generate a structured learning roadmap in clean JSON format.

Output Format (JSON):
{
  "topic": "[Learning Topic]",
  "total_duration": "[Total estimated time]",
  "steps": [
    {
      "step_number": 1,
      "title": "[Step Title]"(string),
      "technology" : [technologie to be learned],
      "duration": "[Time estimate]" (number),
      "perquisites" : [ skill required needed for this step](string array),
      "difficulty_level" : [float 1-5],
      "depth": [number],
      "topic": "(string),
      "is_category": [true or false],
      "importance_score": [number]
      "study_hours_per_day": "[Number of study hours estimate]"(number),
      "description": "[What you'll learn and why]" (string),
      "step_type" :["required", "alternative", "optional"],
      "free_resources": [
        {
          "name": "[Resource Name]"(string),
          "url": "[Working URL]"(string),
          "type": "[tutorial/course/documentation/repository]"
        }
      ],
      "paid_resources": [
        {
          "name": "[Resource Name]"(string),
          "url": "[Working URL]"(string),
          "type": "[course/certification]",
          "platform": "[Udemy/Coursera/etc]"
        }
      ]
    }
  ]
}

Requirements:
- don't interact with humain 
- study hours per day  need to be realistic on average  3h or 2h and the unit is second 
- Return ONLY valid JSON, no markdown formatting or code blocks
- Provide 8-12 steps in logical learning order
- Include 2 free resources and 2 paid resources per step
- Use realistic durations (1 week to 8 weeks per step)
- Ensure all URLs are valid and accessible
- brief descriptions and only focus on  the skill learned  without the mention of the human knowledge 
- Organize steps from beginner to advanced
- we need valid json in string format always no list,ect
- duration provided always use weeks as unit
- skills given are skills that the humain already know ,you use them in adavantage to make the roadmap more common(examples: skip learning that skill or lower the duration of learning it etc ...)
- if the skill has nothing in common with topic you can skip follow it however it's good to follow skills
- you can add up to 2 weeks to the duration given (example 15 weeks you 're max is 17)
- dunration is a number not string
- topic in step : is what category is this blegong example(frontend , backend ...)(minumum words possible for the topic) topic can be also a technology (python , java ...)
- depth is 1 or 2 based on what  topic if it is major topic like (machine learning..) you put 1 if it technologie like (java...) 
- is_category :true if category ,false if  technology to learn
- importance : [0,100] if step A is before step B and we cannot switch them then importance of A > B  
- technology: put only the technology to be learned in the step is not an array 
"""

messages = [
    ("system", system_prompt),
    ("human", "Topic :Devops skills :[] , duration: 20"),
]

response = llm.invoke(messages, tools=[GenAITool(google_search={})])
print(response.content)

```json
{
  "topic": "DevOps",
  "total_duration": "21 weeks",
  "steps": [
    {
      "step_number": 1,
      "title": "Linux Fundamentals & Networking Basics",
      "technology": ["Linux", "Networking"],
      "duration": 2,
      "perquisites": [],
      "difficulty_level": 2.0,
      "depth": 1,
      "topic": "Operating Systems & Networking",
      "is_category": true,
      "importance_score": 95,
      "study_hours_per_day": 7200,
      "description": "Acquire foundational knowledge of Linux operating system commands, file system, user management, and process control. Understand basic networking concepts including TCP/IP, DNS, HTTP/HTTPS, and network troubleshooting. These are critical for interacting with servers and understanding how applications communicate.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Linux Journey",
          "url": "https://linuxjourney.com/",
          "type": "tutorial"
        },
        {
          "name": "

In [34]:
res = """```json
{
  "topic": "DevOps",
  "total_duration": "21 weeks",
  "steps": [
    {
      "step_number": 1,
      "title": "Linux Fundamentals & Networking Basics",
      "technology": ["Linux", "Networking"],
      "duration": 2,
      "perquisites": [],
      "difficulty_level": 2.0,
      "depth": 1,
      "topic": "Operating Systems & Networking",
      "is_category": true,
      "importance_score": 95,
      "study_hours_per_day": 7200,
      "description": "Acquire foundational knowledge of Linux operating system commands, file system, user management, and process control. Understand basic networking concepts including TCP/IP, DNS, HTTP/HTTPS, and network troubleshooting. These are critical for interacting with servers and understanding how applications communicate.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Linux Journey",
          "url": "https://linuxjourney.com/",
          "type": "tutorial"
        },
        {
          "name": "Networking Basics - Cisco Networking Academy",
          "url": "https://skillsforall.com/course/networking-basics",
          "type": "course"
        }
      ],
      "paid_resources": [
        {
          "name": "Complete Linux Training Course to Get Your Dream IT Job 2025",
          "url": "https://www.udemy.com/course/linux-training/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "CCNA Foundations – Networking Basics and Cisco IOS Essentials",
          "url": "https://www.coursera.org/learn/ccna-foundations-networking-basics-cisco-ios-essentials",
          "type": "course",
          "platform": "Coursera"
        }
      ]
    },
    {
      "step_number": 2,
      "title": "Version Control with Git & GitHub",
      "technology": ["Git", "GitHub"],
      "duration": 2,
      "perquisites": ["Linux Fundamentals"],
      "difficulty_level": 2.0,
      "depth": 2,
      "topic": "Version Control",
      "is_category": false,
      "importance_score": 90,
      "study_hours_per_day": 7200,
      "description": "Master Git for version control, including branching, merging, pull requests, and collaborative workflows. Learn to use GitHub for hosting repositories and managing code.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Git Handbook",
          "url": "https://guides.github.com/introduction/git-handbook/",
          "type": "documentation"
        },
        {
          "name": "Learn Git Branching",
          "url": "https://learngitbranching.js.org/",
          "type": "tutorial"
        }
      ],
      "paid_resources": [
        {
          "name": "Mastering Git & GitHub : From Basics to Advanced Workflows",
          "url": "https://www.udemy.com/course/mastering-git-github-from-basics-to-advanced-workflows/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "Git from Basics to Advanced: Practical Guide for Developers",
          "url": "https://www.udemy.com/course/git-from-basics-to-advanced-practical-guide-for-developers/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 3,
      "title": "Scripting with Bash & Python",
      "technology": ["Bash", "Python"],
      "duration": 2,
      "perquisites": ["Linux Fundamentals"],
      "difficulty_level": 3.0,
      "depth": 2,
      "topic": "Scripting",
      "is_category": false,
      "importance_score": 85,
      "study_hours_per_day": 7200,
      "description": "Develop scripting skills in Bash for automating repetitive system administration tasks and Python for more complex automation, data manipulation, and API interactions.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Bash Scripting Tutorial",
          "url": "https://ryanstutorials.net/bash-scripting-tutorial/",
          "type": "tutorial"
        },
        {
          "name": "Automate the Boring Stuff with Python",
          "url": "https://automatetheboringstuff.com/",
          "type": "course"
        }
      ],
      "paid_resources": [
        {
          "name": "Python for DevOps: Learn Python and Bash Scripting",
          "url": "https://www.udemy.com/course/python-for-devops-learn-python-and-bash-scripting/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "Complete Python Programming : From Basics to Advance",
          "url": "https://www.udemy.com/course/complete-python-programming-from-basics-to-advance/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 4,
      "title": "Containerization with Docker",
      "technology": "Docker",
      "duration": 3,
      "perquisites": ["Linux Fundamentals", "Networking Basics"],
      "difficulty_level": 3.5,
      "depth": 2,
      "topic": "Containerization",
      "is_category": false,
      "importance_score": 90,
      "study_hours_per_day": 10800,
      "description": "Understand containerization principles, learn to build, run, and manage Docker images and containers. Explore Docker Compose for defining and running multi-container applications.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Docker Get Started Guide",
          "url": "https://docs.docker.com/get-started/",
          "type": "documentation"
        },
        {
          "name": "Dockerless: Deep Dive Into What Containers Really are About",
          "url": "https://www.udemy.com/course/dockerless-deep-dive-into-what-containers-really-are-about/",
          "type": "course"
        }
      ],
      "paid_resources": [
        {
          "name": "Docker Deep Dive: Build, Ship, and Run Containers",
          "url": "https://www.udemy.com/course/docker-deep-dive/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "Docker & Kubernetes: The Practical Guide [2025 Edition]",
          "url": "https://www.udemy.com/course/docker-kubernetes-the-practical-guide/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 5,
      "title": "Cloud Fundamentals (AWS)",
      "technology": "AWS",
      "duration": 2,
      "perquisites": ["Networking Basics"],
      "difficulty_level": 3.0,
      "depth": 1,
      "topic": "Cloud Computing",
      "is_category": true,
      "importance_score": 80,
      "study_hours_per_day": 7200,
      "description": "Gain an understanding of cloud computing concepts and core Amazon Web Services (AWS) such as EC2, S3, VPC, and IAM. Learn to navigate the AWS console and deploy basic resources.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "AWS Cloud Technical Essentials (Coursera)",
          "url": "https://www.coursera.org/learn/aws-cloud-technical-essentials",
          "type": "course"
        },
        {
          "name": "AWS Fundamentals Specialization - edX",
          "url": "https://www.edx.org/learn/amazon-web-services/amazon-web-services-aws-fundamentals",
          "type": "course"
        }
      ],
      "paid_resources": [
        {
          "name": "AWS Certified Cloud Practitioner Exam Training",
          "url": "https://www.udemy.com/course/aws-certified-cloud-practitioner-clf-c01/",
          "type": "certification",
          "platform": "Udemy"
        },
        {
          "name": "AWS Fundamentals Specialization",
          "url": "https://www.coursera.org/specializations/aws-fundamentals",
          "type": "specialization",
          "platform": "Coursera"
        }
      ]
    },
    {
      "step_number": 6,
      "title": "Infrastructure as Code with Terraform",
      "technology": "Terraform",
      "duration": 2,
      "perquisites": ["Cloud Fundamentals (AWS)", "Git"],
      "difficulty_level": 4.0,
      "depth": 2,
      "topic": "IaC",
      "is_category": false,
      "importance_score": 88,
      "study_hours_per_day": 10800,
      "description": "Learn to define, provision, and manage cloud infrastructure using HashiCorp Terraform. Understand HCL syntax, state management, and module creation for repeatable deployments.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Terraform Tutorials - HashiCorp Developer",
          "url": "https://developer.hashicorp.com/terraform/tutorials",
          "type": "tutorial"
        },
        {
          "name": "Terraform Crash Course for Absolute Beginners",
          "url": "https://www.youtube.com/watch?v=fG_g2b-g_1g",
          "type": "tutorial"
        }
      ],
      "paid_resources": [
        {
          "name": "HashiCorp Certified: Terraform Associate (003) Certification Course",
          "url": "https://www.udemy.com/course/terraform-beginner-to-advanced-devops-with-aws-azure/",
          "type": "certification",
          "platform": "Udemy"
        },
        {
          "name": "Terraform for AWS - Beginner to Advanced",
          "url": "https://www.udemy.com/course/terraform-for-aws-beginner-to-advanced/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 7,
      "title": "CI/CD with Jenkins/GitLab CI",
      "technology": ["Jenkins", "GitLab CI"],
      "duration": 3,
      "perquisites": ["Git", "Docker", "Scripting (Bash/Python)"],
      "difficulty_level": 4.0,
      "depth": 2,
      "topic": "CI/CD",
      "is_category": true,
      "importance_score": 92,
      "study_hours_per_day": 10800,
      "description": "Understand Continuous Integration and Continuous Delivery principles. Learn to set up automated pipelines using Jenkins or GitLab CI to build, test, and deploy applications efficiently.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Jenkins Tutorial for Beginners – Full Guide to CI/CD",
          "url": "https://www.youtube.com/watch?v=p_I_X2Q_g2k",
          "type": "tutorial"
        },
        {
          "name": "GitLab CI/CD Documentation",
          "url": "https://docs.gitlab.com/ee/ci/",
          "type": "documentation"
        }
      ],
      "paid_resources": [
        {
          "name": "Jenkins, From Zero To Hero: Become a DevOps Jenkins Master",
          "url": "https://www.udemy.com/course/jenkins-from-zero-to-hero/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "GitLab CI/CD: Pipelines, Auto DevOps, & Advanced Deployments",
          "url": "https://www.udemy.com/course/gitlab-ci-cd-pipelines-auto-devops-advanced-deployments/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 8,
      "title": "Container Orchestration with Kubernetes",
      "technology": "Kubernetes",
      "duration": 4,
      "perquisites": ["Docker", "Cloud Fundamentals (AWS)"],
      "difficulty_level": 4.5,
      "depth": 2,
      "topic": "Container Orchestration",
      "is_category": false,
      "importance_score": 98,
      "study_hours_per_day": 10800,
      "description": "Master Kubernetes concepts for deploying, managing, and scaling containerized applications. Learn about Pods, Deployments, Services, Namespaces, and using kubectl.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Kubernetes Tutorials - Official Documentation",
          "url": "https://kubernetes.io/docs/tutorials/",
          "type": "documentation"
        },
        {
          "name": "Learn Kubernetes with Civo Academy",
          "url": "https://www.civo.com/academy",
          "type": "course"
        }
      ],
      "paid_resources": [
        {
          "name": "Kubernetes Certified Administrator (CKA) with Practice Tests",
          "url": "https://www.udemy.com/course/certified-kubernetes-administrator-with-practice-tests/",
          "type": "certification",
          "platform": "Udemy"
        },
        {
          "name": "The Complete Kubernetes Course - From Basics to Advanced",
          "url": "https://www.udemy.com/course/the-complete-kubernetes-course/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    },
    {
      "step_number": 9,
      "title": "Monitoring & Logging Basics",
      "technology": ["Prometheus", "Grafana", "ELK Stack (Elasticsearch, Logstash, Kibana)"],
      "duration": 1,
      "perquisites": ["Linux Fundamentals", "Kubernetes (basic understanding)"],
      "difficulty_level": 3.5,
      "depth": 2,
      "topic": "Observability",
      "is_category": true,
      "importance_score": 80,
      "study_hours_per_day": 7200,
      "description": "Understand the importance of monitoring and logging in a DevOps environment. Learn basic setup and usage of tools like Prometheus for metrics, Grafana for visualization, and an introduction to the ELK stack for log management.",
      "step_type": "required",
      "free_resources": [
        {
          "name": "Prometheus Documentation",
          "url": "https://prometheus.io/docs/introduction/overview/",
          "type": "documentation"
        },
        {
          "name": "Grafana Getting Started",
          "url": "https://grafana.com/docs/grafana/latest/getting-started/",
          "type": "documentation"
        }
      ],
      "paid_resources": [
        {
          "name": "DevOps Monitoring with Prometheus, Grafana and Alertmanager",
          "url": "https://www.udemy.com/course/devops-monitoring-with-prometheus-grafana-and-alertmanager/",
          "type": "course",
          "platform": "Udemy"
        },
        {
          "name": "The ELK Stack: Beginner to Advanced",
          "url": "https://www.udemy.com/course/the-elk-stack-beginner-to-advanced/",
          "type": "course",
          "platform": "Udemy"
        }
      ]
    }
  ]
}
```
"""

In [35]:
#res =response.content
#parsed_json_res=json.loads(res[7:-3])
parsed_json_res=json.loads(res[8:-5])
print(parsed_json_res['steps'][0].keys())
print(parsed_json_res.keys())
for step in parsed_json_res['steps']:
    print(f'step {step['step_number']}: {step['title']}')



dict_keys(['step_number', 'title', 'technology', 'duration', 'perquisites', 'difficulty_level', 'depth', 'topic', 'is_category', 'importance_score', 'study_hours_per_day', 'description', 'step_type', 'free_resources', 'paid_resources'])
dict_keys(['topic', 'total_duration', 'steps'])
step 1: Linux Fundamentals & Networking Basics
step 2: Version Control with Git & GitHub
step 3: Scripting with Bash & Python
step 4: Containerization with Docker
step 5: Cloud Fundamentals (AWS)
step 6: Infrastructure as Code with Terraform
step 7: CI/CD with Jenkins/GitLab CI
step 8: Container Orchestration with Kubernetes
step 9: Monitoring & Logging Basics


In [16]:

prisma = Prisma()

async def process_roadmap(roadmap_data: dict):
    """
    Process and insert roadmap data into database with caching
    """
    try:
        
        await prisma.connect()
        print('Starting roadmap processing...')
        
        # Step 1: Check/Create main topic
        main_topic = await find_or_create_main_topic(roadmap_data['topic'])
        print(f'Main topic: {main_topic.title} ({main_topic.id})')
        
        # Initialize caches
        created_skills: Dict[str, str] = {}  # skill_title -> skill_id
        created_topics: Dict[str, str] = {main_topic.title: main_topic.id}  # topic_title -> topic_id
        
        # Step 2: Process each step
        for step in roadmap_data['steps']:
            print(f'Processing: {step["title"]}')
            
            # Step 3: Check/Create step topic (with caching)
            step_topic = await find_or_create_step_topic(
                topic_title=step['topic'],
                is_category=step['is_category'],
                depth=step['depth'],
                parent_topic_id=main_topic.id,
                topic_cache=created_topics
            )
            print(f'Topic: {step_topic.title}')
            
            # Step 4: Check/Create skill
            skill = await find_or_create_skill(step, step_topic.id)
            print(f'Skill: {skill.id}')
            
           # Cache skill with multiple keys for better matching
            created_skills[step['title']] = skill.id
           # cache by technology name if different
            technology = step.get('technology')
            if technology:
                # Ensure technology is a list for uniform handling
                tech_list = technology if isinstance(technology, list) else [technology]
                for tech in tech_list:
                    if tech and tech != step['title']:
                        created_skills[tech] = skill.id
            
            # Step 5: Handle prerequisites
            if step.get('perquisites'): 
                await create_prerequisites(
                    skill_id=skill.id,
                    prerequisite_titles=step['perquisites'],
                    created_skills=created_skills,
                    importance=step.get('step_type', 'required')
                )
                print(f'Prerequisites processed')
        
        print(f'Successfully processed {len(roadmap_data["steps"])} skills!')
        print(f'Created {len(created_skills)} skills and {len(created_topics)} topics')
        return created_skills
        
    except Exception as e:
        print(f' Error processing roadmap: {e}')
        import traceback
        traceback.print_exc()
        raise
    finally:
        await prisma.disconnect()


async def find_or_create_main_topic(topic_title: str):
    """Check if main topic exists, create if not"""
    # Normalize title
    normalized_title = normalize_topic_name(topic_title)
    
    # Check if topic exists
    existing_topic = await prisma.topic.find_first(
        where={
            'title': normalized_title,
            'parent_topic_id': None
        }
    )
    
    if existing_topic:
        print(f'Main topic already exists: {normalized_title}')
        return existing_topic
    
    # Create new main topic
    new_topic = await prisma.topic.create(
        data={
            'title': normalized_title,
            'depth': 1,
            'parent_topic_id': None
        }
    )
    print(f'Created main topic: {normalized_title}')
    return new_topic


async def find_or_create_step_topic(
    topic_title: str,
    is_category: bool,
    depth: int,
    parent_topic_id: str,
    topic_cache: Dict[str, str]
):
    """Check if step topic exists (with caching), create if not"""
    # Normalize title
    normalized_title = normalize_topic_name(topic_title)
    
    # Check cache first
    cache_key = f"{parent_topic_id}:{normalized_title}"
    if cache_key in topic_cache:
        print(f'Topic from cache: {normalized_title}')
        topic_id = topic_cache[cache_key]
        # Fetch and return the topic object
        existing_topic = await prisma.topic.find_unique(where={'id': topic_id})
        return existing_topic
    
    # Check if topic exists under the parent in database
    existing_topic = await prisma.topic.find_first(
        where={
            'title': normalized_title,
            'parent_topic_id': parent_topic_id
        }
    )
    
    if existing_topic:
        print(f'Topic already exists: {normalized_title}')
        # Cache it
        topic_cache[cache_key] = existing_topic.id
        topic_cache[normalized_title] = existing_topic.id
        return existing_topic
    
    # Create new topic
    new_topic = await prisma.topic.create(
        data={
            'title': normalized_title,
            'depth': depth,
            'parent_topic_id': parent_topic_id
        }
    )
    print(f'Created topic: {normalized_title}')
    
    # Cache it
    topic_cache[cache_key] = new_topic.id
    topic_cache[normalized_title] = new_topic.id
    
    return new_topic


async def find_or_create_skill(step: dict, topic_id: str):
    """Check if skill exists, create if not"""
    # Normalize title
    normalized_title = normalize_skill_name(step['title'])
    
    # Prepare technology field - ensure it's a list
    technology = step.get('technology', step.get('topic', ''))
    if isinstance(technology, str):
        technologie_list = [technology] if technology else []
    elif isinstance(technology, list):
        technologie_list = technology
    else:
        technologie_list = []
    
    # Check if skill exists
    existing_skill = await prisma.skill.find_first(
        where={
            'title': normalized_title,
            'topic_id': topic_id
        }
    )
    
    if existing_skill:
        print(f'Skill already exists: {normalized_title}')
        # Update existing skill with new data
        updated_skill = await prisma.skill.update(
            where={'id': existing_skill.id},
            data={
                'description': step.get('description', ''),
                'duration': int(step.get('duration', 0)),
                'score': calculate_score(step.get('importance_score', 0),step.get('step_number', 0), len(step.get('perquisites', []))),
                'study_hours_per_day': int(step.get('study_hours_per_day', 0)),
                'skill_type': map_skill_type(step.get('step_type', 'required')),
                'technologie': technologie_list,  # Use the list
                'free_resources': json.dumps(step.get('free_resources',{})),
                'paid_resources': json.dumps(step.get('paid_resources', {}))
            }
        )
        return updated_skill
    
    # Create new skill
    new_skill = await prisma.skill.create(
        data={
            'topic_id': topic_id,
            'title': normalized_title,
            'description': step.get('description', ''),
            'duration': int(step.get('duration', 0)),
            'score': calculate_score(step.get('importance_score', 0),step.get('step_number', 0), len(step.get('perquisites', []))),
            'study_hours_per_day': int(step.get('study_hours_per_day', 0)),
            'skill_type': map_skill_type(step.get('step_type', 'required')),
            'technologie': technologie_list,  # Use the list
            'free_resources': json.dumps(step.get('free_resources',{})),
            'paid_resources': json.dumps(step.get('paid_resources', {}))
        }
    )
    print(f'Created skill: {normalized_title}')
    return new_skill


async def create_prerequisites(
    skill_id: str,
    prerequisite_titles: List[str],
    created_skills: Dict[str, str],
    importance: str
):
    """Create prerequisite relationships with smart matching"""
    if not prerequisite_titles:
        return
    
    for prereq_title in prerequisite_titles:
        # Normalize prerequisite name
        normalized_prereq = normalize_skill_name(prereq_title)
        
        # Find matching skill using multiple strategies
        prerequisite_id = find_matching_skill_id(normalized_prereq, created_skills)
        
        # If not found in cache, search database
        if not prerequisite_id:
            existing_prereq = await prisma.skill.find_first(
                where={
                    'OR': [
                        {'title': {'contains': normalized_prereq, 'mode': 'insensitive'}},
                        {'technologie': {'equals': normalized_prereq, 'mode': 'insensitive'}},
                        {'title': {'contains': prereq_title, 'mode': 'insensitive'}},
                    ]
                }
            )
            if existing_prereq:
                prerequisite_id = existing_prereq.id
                created_skills[existing_prereq.title] = existing_prereq.id
                created_skills[normalized_prereq] = existing_prereq.id
                print(f'Found "{prereq_title}" as "{existing_prereq.title}" in database')
        
        if not prerequisite_id:
            print(f'Prerequisite not found: {prereq_title}')
            continue
        
        # Avoid self-referential prerequisites
        if prerequisite_id == skill_id:
            print(f'Skipping self-referential prerequisite')
            continue
        
        # Create relationship (check first to avoid duplicates)
        try:
            # Check if already exists
            existing = await prisma.prerequisiteforskill.find_unique(
                where={
                    'skill_id_prerequisite_id': {
                        'skill_id': skill_id,
                        'prerequisite_id': prerequisite_id
                    }
                }
            )
            
            if existing:
                print(f'Prerequisite already linked: {prereq_title}')
            else:
                # Create new relationship
                await prisma.prerequisiteforskill.create(
                    data={
                        'skill_id': skill_id,
                        'prerequisite_id': prerequisite_id,
                        'importance': map_skill_type(importance)
                    }
                )
                print(f'Linked prerequisite: {prereq_title}')
        except Exception as e:
            print(f'Error linking prerequisite: {str(e)}')


def normalize_topic_name(name: str) -> str:
    """Normalize topic name for consistent matching"""
    if not name:
        return ""
    # Strip whitespace, title case
    return name.strip().title()


def normalize_skill_name(name: str) -> str:
    """Normalize skill name for consistent matching"""
    if not name:
        return ""
    # Strip whitespace, but preserve original casing for skills
    return name.strip()


def find_matching_skill_id(prereq: str, created_skills: Dict[str, str]) -> Optional[str]:
    """
    Find matching skill ID using multiple strategies
    """
    prereq_lower = prereq.lower().strip()
    
    # Strategy 1: Exact match
    if prereq in created_skills:
        return created_skills[prereq]
    
    # Strategy 2: Case-insensitive exact match
    for skill_title, skill_id in created_skills.items():
        if skill_title.lower() == prereq_lower:
            return skill_id
    
    # Strategy 3: Substring match
    for skill_title, skill_id in created_skills.items():
        skill_lower = skill_title.lower()
        # Check if prerequisite is contained in skill title
        # e.g., "HTML" matches "HTML Fundamentals"
        if prereq_lower in skill_lower or skill_lower in prereq_lower:
            return skill_id
    
    # Strategy 4: Word-based match (ignore common words)
    common_words = {'basics', 'fundamentals', 'introduction', 'advanced', 'complete', 
                   'guide', 'tutorial', 'course', 'learning', 'mastering', '&', 'and'}
    
    prereq_words = set(prereq_lower.split()) - common_words
    
    for skill_title, skill_id in created_skills.items():
        skill_words = set(skill_title.lower().split()) - common_words
        
        # If prerequisite words are subset of skill words
        if prereq_words and prereq_words.issubset(skill_words):
            return skill_id
    
    return None

def calculate_score(importance_score:float , step_number:int , prerequisite_number:int)-> float:
    return round(importance_score *1/sqrt((2+prerequisite_number)) *1/(4+step_number),3)

def map_skill_type(step_type: str) -> str:
    """Map step_type to SkillTypeEnum"""
    if not step_type:
        return 'REQUIRED'
    
    mapping = {
        'required': 'REQUIRED',
        'alternative': 'RECOMMENDED',
        'optional': 'OPTIONAL',
        'recommended': 'RECOMMENDED'
    }
    return mapping.get(step_type.lower(), 'REQUIRED')

    
created_skills=await process_roadmap(parsed_json_res)


Starting roadmap processing...
Main topic already exists: Devops
Main topic: Devops (cmj2gfnx20001b2ec9c1ha3br)
Processing: Linux Fundamentals & Networking Basics
Topic already exists: Operating Systems & Networking
Topic: Operating Systems & Networking
Skill already exists: Linux Fundamentals & Networking Basics
Skill: cmj2gsl0z00014fnq5b4ut2lu
Processing: Version Control with Git & GitHub
Topic already exists: Version Control
Topic: Version Control
Skill already exists: Version Control with Git & GitHub
Skill: cmj2h2vot0003jstxrcv5verd
Prerequisite already linked: Linux Fundamentals
Prerequisites processed
Processing: Scripting with Bash & Python
Topic already exists: Scripting
Topic: Scripting
Skill already exists: Scripting with Bash & Python
Skill: cmj2h2vph0007jstxoyv77bya
Prerequisite already linked: Linux Fundamentals
Prerequisites processed
Processing: Containerization with Docker
Topic already exists: Containerization
Topic: Containerization
Created skill: Containerization wi

In [32]:
async def getTopic(topic : str):
    nr_topic = normalize_topic_name(topic)
    topic = await prisma.topic.find_first(
        where={
            'AND': [{'title': nr_topic},
                   {'parent_topic_id' : None}]
        },

    )
    
    return topic

try:
    await prisma.connect()
    topic = await getTopic("Devops")
except Exception as e :
    print(f'did not get the topic {str(e)}')
finally:
    await prisma.disconnect()
print(topic.id)

cmj2gfnx20001b2ec9c1ha3br


In [33]:
async def get_all_topic(topic_id: str):
    subtopics = await prisma.topic.find_many(
        where={
            'parent_topic_id': topic_id,
        },
        include={
            'skills': {
                'include': {
                    'prerequisites': True
                }
            }
        }
    )
    return subtopics
cashed_skills= {}
try:
    await prisma.connect()
    subtopics = await get_all_topic(topic.id)
    
    # Sort subtopics by the highest skill score in each subtopic
    sorted_subtopics = sorted(
        subtopics,
        key=lambda subtopic: max((skill.score for skill in subtopic.skills), default=0),
        reverse=True
    )
    # Display sorted results
    for subtopic in sorted_subtopics: 
        print(f"\n{subtopic.title}")
        
        # Sort skills within each subtopic by score
        sorted_skills = sorted(subtopic.skills, key=lambda skill: skill.score, reverse=True)
        
        for skill in sorted_skills:
            cashed_skills[skill.id] = skill.title
            print(f'  skill_id_sort: {skill.id}')
            print(f'  sorted: {skill.title}')
            print(f'  sorted_score: {skill.score}')
            for prerequisite in skill.prerequisites:
                if prerequisite.prerequisite_id in cashed_skills.keys():
                    print(cashed_skills[prerequisite.prerequisite_id])
                else:
                    print('prerequisite Name not found')
            
except Exception as e:
    print(f'Did not get the topic: {str(e)}')
finally:
    await prisma.disconnect()


Operating Systems & Networking
  skill_id_sort: cmj2gsl0z00014fnq5b4ut2lu
  sorted: Linux Fundamentals & Networking Basics
  sorted_score: 13.435

Version Control
  skill_id_sort: cmj2h2vot0003jstxrcv5verd
  sorted: Version Control with Git & GitHub
  sorted_score: 8.66
Linux Fundamentals & Networking Basics

Scripting
  skill_id_sort: cmj2h2vph0007jstxoyv77bya
  sorted: Scripting with Bash & Python
  sorted_score: 7.011
Linux Fundamentals & Networking Basics

Containerization
  skill_id_sort: cmj2h60ni0001ralyp4nq45ut
  sorted: Containerization with Docker
  sorted_score: 5.625
Linux Fundamentals & Networking Basics

Cloud Computing
  skill_id_sort: cmj2h60oa0005raly81sh9dlm
  sorted: Cloud Fundamentals (AWS)
  sorted_score: 5.132
Linux Fundamentals & Networking Basics

Iac
  skill_id_sort: cmj2h60ox0009ralyqeosv4nl
  sorted: Infrastructure as Code with Terraform
  sorted_score: 4.4
Cloud Fundamentals (AWS)
Version Control with Git & GitHub

Container Orchestration
  skill_id_sort: c