In [22]:
import asyncio
from prisma import Prisma
import json
from typing import *
from dump.dump_data import ai_output_roadmap
from dotenv import load_dotenv
import sys
from pathlib import Path
import os

In [23]:
project_root = Path.cwd().parent  # Go up from test/ to project root

# Load .env
env_path = project_root / ".env"
load_dotenv(env_path)
# Verify it loaded
print(f"DATABASE_URL loaded: {bool(os.getenv('DATABASE_URL'))}")

DATABASE_URL loaded: True


In [24]:
parsed_json = json.loads(ai_output_roadmap)
print(f'keys of the topics : {parsed_json.keys()}')
step_one=parsed_json.get('steps')[0]
print(f'keys of step: {step_one.keys()}')


keys of the topics : dict_keys(['topic', 'total_duration', 'steps'])
keys of step: dict_keys(['step_number', 'title', 'technology', 'duration', 'perquisites', 'difficulty_level', 'depth', 'topic', 'is_category', 'importance_score', 'study_hours_per_day', 'description', 'step_type', 'free_resources', 'paid_resources'])


In [25]:
### check if the main topic exists (create it if do not exists)
### for each step we are going to extract the skill
### check if skill is present if not we are going to create it 
###check the topic of the step if not existed create it 
###check prerequisite if not existed created even empyty
### we put fields

### calculation of score
## regenerate the roadmap without the agent
### add Prerequisites if not existed 

In [26]:
parsed_json.get('steps')[1]

{'step_number': 2,
 'title': 'JavaScript Basics & DOM Manipulation',
 'technology': 'JavaScript',
 'duration': 2,
 'perquisites': ['HTML', 'CSS'],
 'difficulty_level': 2.5,
 'depth': 2,
 'topic': 'Frontend',
 'is_category': False,
 'importance_score': 90,
 'study_hours_per_day': 10800,
 'description': 'Adding interactivity to web pages using JavaScript, covering variables, data types, functions, control flow, and manipulating the Document Object Model (DOM) to dynamically change content and styles.',
 'step_type': 'required',
 'free_resources': [{'name': 'MDN Web Docs: JavaScript Guide',
   'url': 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide',
   'type': 'documentation'},
  {'name': 'freeCodeCamp JavaScript Algorithms and Data Structures',
   'url': 'https://www.freecodecamp.org/learn/javascript-algorithms-and-data-structures/',
   'type': 'course'}],
 'paid_resources': [{'name': 'The Complete JavaScript Course 2024: From Zero to Expert!',
   'url': 'https://www.udemy

In [27]:
for step in parsed_json.get('steps'):
    print(step['topic'])

Frontend
Frontend
Backend
Database
Backend
Frontend
Fullstack Integration


In [28]:

prisma = Prisma()

async def process_roadmap(roadmap_data: dict):
    """
    Process and insert roadmap data into database with caching
    """
    try:
        await prisma.connect()
        print('Starting roadmap processing...')
        
        # Step 1: Check/Create main topic
        main_topic = await find_or_create_main_topic(roadmap_data['topic'])
        print(f'Main topic: {main_topic.title} ({main_topic.id})')
        
        # Initialize caches
        created_skills: Dict[str, str] = {}  # skill_title -> skill_id
        created_topics: Dict[str, str] = {main_topic.title: main_topic.id}  # topic_title -> topic_id
        
        # Step 2: Process each step
        for step in roadmap_data['steps']:
            print(f'Processing: {step["title"]}')
            
            # Step 3: Check/Create step topic (with caching)
            step_topic = await find_or_create_step_topic(
                topic_title=step['topic'],
                is_category=step['is_category'],
                depth=step['depth'],
                parent_topic_id=main_topic.id,
                topic_cache=created_topics
            )
            print(f'Topic: {step_topic.title}')
            
            # Step 4: Check/Create skill
            skill = await find_or_create_skill(step, step_topic.id)
            print(f'Skill: {skill.id}')
            
            # Cache skill with multiple keys for better matching
            created_skills[step['title']] = skill.id
            # cache by technology name if different
            if step.get('technology') and step['technology'] != step['title']:
                created_skills[step['technology']] = skill.id
            
            # Step 5: Handle prerequisites
            if step.get('perquisites'): 
                await create_prerequisites(
                    skill_id=skill.id,
                    prerequisite_titles=step['perquisites'],
                    created_skills=created_skills,
                    importance=step.get('step_type', 'required')
                )
                print(f'Prerequisites processed')
        
        print(f'Successfully processed {len(roadmap_data["steps"])} skills!')
        print(f'Created {len(created_skills)} skills and {len(created_topics)} topics')
        return created_skills
        
    except Exception as e:
        print(f' Error processing roadmap: {e}')
        import traceback
        traceback.print_exc()
        raise
    finally:
        await prisma.disconnect()


async def find_or_create_main_topic(topic_title: str):
    """Check if main topic exists, create if not"""
    # Normalize title
    normalized_title = normalize_topic_name(topic_title)
    
    # Check if topic exists
    existing_topic = await prisma.topic.find_first(
        where={
            'title': normalized_title,
            'parent_topic_id': None
        }
    )
    
    if existing_topic:
        print(f'Main topic already exists: {normalized_title}')
        return existing_topic
    
    # Create new main topic
    new_topic = await prisma.topic.create(
        data={
            'title': normalized_title,
            'depth': 1,
            'parent_topic_id': None
        }
    )
    print(f'Created main topic: {normalized_title}')
    return new_topic


async def find_or_create_step_topic(
    topic_title: str,
    is_category: bool,
    depth: int,
    parent_topic_id: str,
    topic_cache: Dict[str, str]
):
    """Check if step topic exists (with caching), create if not"""
    # Normalize title
    normalized_title = normalize_topic_name(topic_title)
    
    # Check cache first
    cache_key = f"{parent_topic_id}:{normalized_title}"
    if cache_key in topic_cache:
        print(f'Topic from cache: {normalized_title}')
        topic_id = topic_cache[cache_key]
        # Fetch and return the topic object
        existing_topic = await prisma.topic.find_unique(where={'id': topic_id})
        return existing_topic
    
    # Check if topic exists under the parent in database
    existing_topic = await prisma.topic.find_first(
        where={
            'title': normalized_title,
            'parent_topic_id': parent_topic_id
        }
    )
    
    if existing_topic:
        print(f'Topic already exists: {normalized_title}')
        # Cache it
        topic_cache[cache_key] = existing_topic.id
        topic_cache[normalized_title] = existing_topic.id
        return existing_topic
    
    # Create new topic
    new_topic = await prisma.topic.create(
        data={
            'title': normalized_title,
            'depth': depth,
            'parent_topic_id': parent_topic_id
        }
    )
    print(f'Created topic: {normalized_title}')
    
    # Cache it
    topic_cache[cache_key] = new_topic.id
    topic_cache[normalized_title] = new_topic.id
    
    return new_topic


async def find_or_create_skill(step: dict, topic_id: str):
    """Check if skill exists, create if not"""
    # Normalize title
    normalized_title = normalize_skill_name(step['title'])
    
    # Check if skill exists
    existing_skill = await prisma.skill.find_first(
        where={
            'title': normalized_title,
            'topic_id': topic_id
        }
    )
    
    if existing_skill:
        print(f'Skill already exists: {normalized_title}')
        # Update existing skill with new data
        updated_skill = await prisma.skill.update(
            where={'id': existing_skill.id},
            data={
                'description': step.get('description', ''),
                'duration': int(step.get('duration', 0)),
                'score': int(step.get('importance_score', 0)),
                'study_hours_per_day': int(step.get('study_hours_per_day', 0)),
                'skill_type': map_skill_type(step.get('step_type', 'required')),
                'technologie': step.get('technology', step.get('topic', '')),
                'free_resources': json.dumps(step.get('free_resources',{})),
                'paid_resources': json.dumps(step.get('paid_resources', {}))
            }
        )
        return updated_skill
    
    # Create new skill
    new_skill = await prisma.skill.create(
        data={
            'topic_id': topic_id,
            'title': normalized_title,
            'description': step.get('description', ''),
            'duration': int(step.get('duration', 0)),
            'score': int(step.get('importance_score', 0)),
            'study_hours_per_day': int(step.get('study_hours_per_day', 0)),
            'skill_type': map_skill_type(step.get('step_type', 'required')),
            'technologie': step.get('technology', step.get('topic', '')),
           'free_resources': json.dumps(step.get('free_resources',{})),
            'paid_resources': json.dumps(step.get('paid_resources', {}))
        }
    )
    print(f'Created skill: {normalized_title}')
    return new_skill


async def create_prerequisites(
    skill_id: str,
    prerequisite_titles: List[str],
    created_skills: Dict[str, str],
    importance: str
):
    """Create prerequisite relationships with smart matching"""
    if not prerequisite_titles:
        return
    
    for prereq_title in prerequisite_titles:
        # Normalize prerequisite name
        normalized_prereq = normalize_skill_name(prereq_title)
        
        # Find matching skill using multiple strategies
        prerequisite_id = find_matching_skill_id(normalized_prereq, created_skills)
        
        # If not found in cache, search database
        if not prerequisite_id:
            existing_prereq = await prisma.skill.find_first(
                where={
                    'OR': [
                        {'title': {'contains': normalized_prereq, 'mode': 'insensitive'}},
                        {'technologie': {'equals': normalized_prereq, 'mode': 'insensitive'}},
                        {'title': {'contains': prereq_title, 'mode': 'insensitive'}},
                    ]
                }
            )
            if existing_prereq:
                prerequisite_id = existing_prereq.id
                created_skills[existing_prereq.title] = existing_prereq.id
                created_skills[normalized_prereq] = existing_prereq.id
                print(f'Found "{prereq_title}" as "{existing_prereq.title}" in database')
        
        if not prerequisite_id:
            print(f'Prerequisite not found: {prereq_title}')
            continue
        
        # Avoid self-referential prerequisites
        if prerequisite_id == skill_id:
            print(f'Skipping self-referential prerequisite')
            continue
        
        # Create relationship (check first to avoid duplicates)
        try:
            # Check if already exists
            existing = await prisma.prerequisiteforskill.find_unique(
                where={
                    'skill_id_prerequisite_id': {
                        'skill_id': skill_id,
                        'prerequisite_id': prerequisite_id
                    }
                }
            )
            
            if existing:
                print(f'Prerequisite already linked: {prereq_title}')
            else:
                # Create new relationship
                await prisma.prerequisiteforskill.create(
                    data={
                        'skill_id': skill_id,
                        'prerequisite_id': prerequisite_id,
                        'importance': map_skill_type(importance)
                    }
                )
                print(f'Linked prerequisite: {prereq_title}')
        except Exception as e:
            print(f'Error linking prerequisite: {str(e)}')


def normalize_topic_name(name: str) -> str:
    """Normalize topic name for consistent matching"""
    if not name:
        return ""
    # Strip whitespace, title case
    return name.strip().title()


def normalize_skill_name(name: str) -> str:
    """Normalize skill name for consistent matching"""
    if not name:
        return ""
    # Strip whitespace, but preserve original casing for skills
    return name.strip()


def find_matching_skill_id(prereq: str, created_skills: Dict[str, str]) -> Optional[str]:
    """
    Find matching skill ID using multiple strategies
    """
    prereq_lower = prereq.lower().strip()
    
    # Strategy 1: Exact match
    if prereq in created_skills:
        return created_skills[prereq]
    
    # Strategy 2: Case-insensitive exact match
    for skill_title, skill_id in created_skills.items():
        if skill_title.lower() == prereq_lower:
            return skill_id
    
    # Strategy 3: Substring match
    for skill_title, skill_id in created_skills.items():
        skill_lower = skill_title.lower()
        # Check if prerequisite is contained in skill title
        # e.g., "HTML" matches "HTML Fundamentals"
        if prereq_lower in skill_lower or skill_lower in prereq_lower:
            return skill_id
    
    # Strategy 4: Word-based match (ignore common words)
    common_words = {'basics', 'fundamentals', 'introduction', 'advanced', 'complete', 
                   'guide', 'tutorial', 'course', 'learning', 'mastering', '&', 'and'}
    
    prereq_words = set(prereq_lower.split()) - common_words
    
    for skill_title, skill_id in created_skills.items():
        skill_words = set(skill_title.lower().split()) - common_words
        
        # If prerequisite words are subset of skill words
        if prereq_words and prereq_words.issubset(skill_words):
            return skill_id
    
    return None


def map_skill_type(step_type: str) -> str:
    """Map step_type to SkillTypeEnum"""
    if not step_type:
        return 'REQUIRED'
    
    mapping = {
        'required': 'REQUIRED',
        'alternative': 'RECOMMENDED',
        'optional': 'OPTIONAL',
        'recommended': 'RECOMMENDED'
    }
    return mapping.get(step_type.lower(), 'REQUIRED')

    
created_skills=await process_roadmap(parsed_json)


Starting roadmap processing...
Main topic already exists: Web Development
Main topic: Web Development (cmiy8o4z10001h5h7t0ewwb22)
Processing: CSS Fundamentals & Responsive Design
Topic already exists: Frontend
Topic: Frontend
Skill already exists: CSS Fundamentals & Responsive Design
Skill: cmiya13ku0001qs99s1e9name
Prerequisite not found: HTML
Prerequisites processed
Processing: JavaScript Basics & DOM Manipulation
Topic from cache: Frontend
Topic: Frontend
Skill already exists: JavaScript Basics & DOM Manipulation
Skill: cmiyuyutb0001n2gbe159uxh3
Prerequisite not found: HTML
Prerequisite already linked: CSS
Prerequisites processed
Processing: Python Web Framework - Flask Introduction
Topic already exists: Backend
Topic: Backend
Skill already exists: Python Web Framework - Flask Introduction
Skill: cmiyuyuts0005n2gbjjyt582c
Skipping self-referential prerequisite
Prerequisite not found: HTML
Prerequisites processed
Processing: Database Fundamentals - SQL & ORMs
Topic already exists: Da