# AI-Powered Career Navigation Platform Implementation

This notebook demonstrates the implementation of the core components of the Career Navigation Platform.

## 1. Install Required Libraries

In [1]:
!pip install fastapi uvicorn python-multipart openai langchain langchain-google-genai pinecone-client neo4j sentence-transformers spacy redis python-dotenv pydantic
!python -m spacy download en_core_web_sm




[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting https://github.com/explosion/spacy-models/releases/download/-en_core_web_sm/-en_core_web_sm.tar.gz


  ERROR: HTTP error 404 while getting https://github.com/explosion/spacy-models/releases/download/-en_core_web_sm/-en_core_web_sm.tar.gz
ERROR: Could not install requirement https://github.com/explosion/spacy-models/releases/download/-en_core_web_sm/-en_core_web_sm.tar.gz because of HTTP error 404 Client Error: Not Found for url: https://github.com/explosion/spacy-models/releases/download/-en_core_web_sm/-en_core_web_sm.tar.gz for URL https://github.com/explosion/spacy-models/releases/download/-en_core_web_sm/-en_core_web_sm.tar.gz

[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


## 2. Define Data Models for Resume Parsing

In [None]:
from pydantic import BaseModel, Field
from typing import List, Optional, Dict

class ExtractedSkill(BaseModel):
    name: str = Field(description="Skill name")
    category: str = Field(description="Category: technical, soft, domain")
    proficiency: int = Field(description="Estimated proficiency 1-5")
    years_experience: float = Field(description="Years of experience with skill")

class ExtractedExperience(BaseModel):
    company: str
    role: str
    duration_months: int
    description: str
    skills_used: List[str]

class ParsedResume(BaseModel):
    full_name: str
    email: Optional[str]
    phone: Optional[str]
    current_role: str
    years_total_experience: int
    skills: List[ExtractedSkill]
    experience: List[ExtractedExperience]
    education: List[str]
    certifications: List[str]
    industry: str
    summary: str

class CareerPathRequest(BaseModel):
    current_role: str
    target_role: Optional[str] = None
    user_skills: List[str]

class CareerPathResponse(BaseModel):
    paths: List[Dict]
    recommended_path: Dict
    skill_gaps: Dict

## 3. Implement AI Resume Parser

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
import PyPDF2
import docx
import io

def extract_text(file_bytes: bytes, filename: str) -> str:
    """Extract text from PDF or DOCX"""
    if filename.endswith('.pdf'):
        return _extract_from_pdf(file_bytes)
    elif filename.endswith('.docx'):
        return _extract_from_docx(file_bytes)
    else:
        return file_bytes.decode('utf-8')

def _extract_from_pdf(file_bytes: bytes) -> str:
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def _extract_from_docx(file_bytes: bytes) -> str:
    doc = docx.Document(io.BytesIO(file_bytes))
    return "\n".join([para.text for para in doc.paragraphs])

class AIResumeParser:
    def __init__(self, google_api_key: str):
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-pro",
            temperature=0.1,
            google_api_key=google_api_key
        )
        self.parser = PydanticOutputParser(pydantic_object=ParsedResume)
    
    async def parse_resume(self, resume_text: str) -> ParsedResume:
        """Parse resume using Gemini Pro"""
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert resume parser. Extract structured information 
            from resumes accurately. For skills, categorize them and estimate proficiency 
            based on context (junior/senior role, years of experience mentioned).
            
            {format_instructions}
            """),
            ("user", "Parse this resume:\n\n{resume_text}")
        ])
        
        chain = prompt | self.llm | self.parser
        
        result = await chain.ainvoke({
            "resume_text": resume_text,
            "format_instructions": self.parser.get_format_instructions()
        })
        
        return result

## 4. Create Vector Database Service for Skill Matching

In [None]:
import pinecone
from sentence_transformers import SentenceTransformer
import numpy as np

class SkillVectorDB:
    def __init__(self, pinecone_api_key: str, index_name: str = "career-skills"):
        # Initialize Pinecone
        pinecone.init(api_key=pinecone_api_key, environment="us-west1-gcp")
        
        # Create or connect to index
        if index_name not in pinecone.list_indexes():
            pinecone.create_index(
                index_name,
                dimension=384,  # all-MiniLM-L6-v2 embedding size
                metric="cosine"
            )
        
        self.index = pinecone.Index(index_name)
        
        # Load sentence transformer model
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
    
    def add_skills(self, skills: List[Dict]):
        """Add skills to vector database"""
        vectors = []
        for skill in skills:
            # Generate embedding
            embedding = self.model.encode(skill['name']).tolist()
            
            vectors.append({
                'id': skill['id'],
                'values': embedding,
                'metadata': {
                    'name': skill['name'],
                    'category': skill.get('category', 'general'),
                    'demand_score': skill.get('demand_score', 50)
                }
            })
        
        # Upsert to Pinecone
        self.index.upsert(vectors=vectors)
    
    def find_similar_skills(self, skill_name: str, top_k: int = 5) -> List[Dict]:
        """Find semantically similar skills"""
        
        # Generate query embedding
        query_embedding = self.model.encode(skill_name).tolist()
        
        # Search in Pinecone
        results = self.index.query(
            vector=query_embedding,
            top_k=top_k,
            include_metadata=True
        )
        
        similar_skills = []
        for match in results['matches']:
            similar_skills.append({
                'skill': match['metadata']['name'],
                'category': match['metadata']['category'],
                'similarity_score': match['score'],
                'demand_score': match['metadata']['demand_score']
            })
        
        return similar_skills
    
    def match_user_skills_to_role(self, user_skills: List[str], 
                                   role_required_skills: List[str]) -> Dict:
        """Match user skills against role requirements"""
        
        matched_skills = []
        missing_skills = []
        
        for required_skill in role_required_skills:
            # Find if user has similar skill
            user_embeddings = self.model.encode(user_skills)
            required_embedding = self.model.encode(required_skill)
            
            # Calculate cosine similarities
            similarities = np.dot(user_embeddings, required_embedding) / (
                np.linalg.norm(user_embeddings, axis=1) * np.linalg.norm(required_embedding)
            )
            
            max_similarity = similarities.max()
            
            if max_similarity > 0.7:  # Threshold for "match"
                matched_idx = similarities.argmax()
                matched_skills.append({
                    'required': required_skill,
                    'user_has': user_skills[matched_idx],
                    'match_score': float(max_similarity)
                })
            else:
                missing_skills.append(required_skill)
        
        match_percentage = (len(matched_skills) / len(role_required_skills)) * 100
        
        return {
            'match_percentage': match_percentage,
            'matched_skills': matched_skills,
            'missing_skills': missing_skills
        }

## 5. Create Graph Database Service for Career Paths

In [None]:
from neo4j import GraphDatabase
from dataclasses import dataclass

@dataclass
class CareerPath:
    roles: List[str]
    total_months: int
    avg_difficulty: float
    salary_growth: int
    required_skills: List[str]

class CareerGraphDB:
    def __init__(self, uri: str, user: str, password: str):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def create_career_graph_schema(self):
        """Initialize career graph schema"""
        with self.driver.session() as session:
            session.run("""
                CREATE CONSTRAINT role_id IF NOT EXISTS
                FOR (r:Role) REQUIRE r.id IS UNIQUE
            """)
            session.run("""
                CREATE CONSTRAINT skill_id IF NOT EXISTS
                FOR (s:Skill) REQUIRE s.id IS UNIQUE
            """)
    
    def add_role(self, role_data: Dict):
        """Add a career role to graph"""
        with self.driver.session() as session:
            session.run("""
                MERGE (r:Role {id: $id})
                SET r.title = $title,
                    r.industry = $industry,
                    r.level = $level,
                    r.avg_salary = $avg_salary,
                    r.growth_rate = $growth_rate,
                    r.demand_score = $demand_score
            """, **role_data)
    
    def add_transition(self, from_role_id: str, to_role_id: str, 
                      transition_data: Dict):
        """Add career transition relationship"""
        with self.driver.session() as session:
            session.run("""
                MATCH (from:Role {id: $from_id})
                MATCH (to:Role {id: $to_id})
                MERGE (from)-[t:TRANSITIONS_TO]->(to)
                SET t.avg_months = $avg_months,
                    t.difficulty = $difficulty,
                    t.success_rate = $success_rate,
                    t.common_path = $common_path
            """, from_id=from_role_id, to_id=to_role_id, **transition_data)
    
    def add_skill_requirement(self, role_id: str, skill_id: str, 
                            proficiency: int, importance: str, skill_name: Optional[str] = None):
        """Link role to required skill"""
        with self.driver.session() as session:
            session.run("""
                MATCH (r:Role {id: $role_id})
                MERGE (s:Skill {id: $skill_id})
                ON CREATE SET s.name = $skill_name
                MERGE (r)-[req:REQUIRES_SKILL]->(s)
                SET req.proficiency = $proficiency,
                    req.importance = $importance
            """, role_id=role_id, skill_id=skill_id, 
                proficiency=proficiency, importance=importance, skill_name=skill_name or skill_id)
    
    def find_career_paths(self, current_role: str, target_role: Optional[str] = None,
                         max_hops: int = 4) -> List[CareerPath]:
        """Find possible career paths"""
        
        with self.driver.session() as session:
            if target_role:
                query = """
                    MATCH path = allShortestPaths(
                        (current:Role {title: $current})-[:TRANSITIONS_TO*1..$max_hops]->(target:Role {title: $target})
                    )
                    WITH path, relationships(path) as rels, nodes(path) as roles
                    RETURN 
                        [r in roles | r.title] as role_titles,
                        reduce(months = 0, rel in rels | months + rel.avg_months) as total_months,
                        reduce(diff = 0, rel in rels | diff + rel.difficulty) / size(rels) as avg_difficulty,
                        roles[-1].avg_salary - roles[0].avg_salary as salary_growth
                    ORDER BY total_months, avg_difficulty
                    LIMIT 10
                """
                result = session.run(query, current=current_role, target=target_role, 
                                   max_hops=max_hops)
            else:
                query = """
                    MATCH path = (current:Role {title: $current})-[:TRANSITIONS_TO*1..$max_hops]->(target:Role)
                    WITH path, relationships(path) as rels, nodes(path) as roles
                    WHERE size(roles) >= 2
                    RETURN DISTINCT
                        [r in roles | r.title] as role_titles,
                        reduce(months = 0, rel in rels | months + rel.avg_months) as total_months,
                        reduce(diff = 0, rel in rels | diff + rel.difficulty) / size(rels) as avg_difficulty,
                        roles[-1].avg_salary - roles[0].avg_salary as salary_growth
                    ORDER BY salary_growth DESC, total_months ASC
                    LIMIT 20
                """
                result = session.run(query, current=current_role, max_hops=max_hops)
            
            paths = []
            for record in result:
                target = record['role_titles'][-1]
                skills = self._get_role_skills(target)
                
                paths.append(CareerPath(
                    roles=record['role_titles'],
                    total_months=record['total_months'],
                    avg_difficulty=record['avg_difficulty'],
                    salary_growth=record['salary_growth'],
                    required_skills=skills
                ))
            
            return paths
    
    def _get_role_skills(self, role_title: str) -> List[str]:
        """Get required skills for a role"""
        with self.driver.session() as session:
            result = session.run("""
                MATCH (r:Role {title: $title})-[req:REQUIRES_SKILL]->(s:Skill)
                WHERE req.importance IN ['high', 'critical']
                RETURN s.name as skill
                ORDER BY req.proficiency DESC
            """, title=role_title)
            
            return [record['skill'] for record in result]

## 6. Implement Redis Caching Layer

In [None]:
import redis.asyncio as redis
import json
from typing import Any

class RedisCache:
    def __init__(self, redis_url: str):
        self.redis = redis.from_url(redis_url, decode_responses=True)
    
    async def get(self, key: str) -> Optional[Any]:
        """Get cached value"""
        value = await self.redis.get(key)
        if value:
            return json.loads(value)
        return None
    
    async def set(self, key: str, value: Any, expire: int = 3600):
        """Cache value with expiration"""
        await self.redis.setex(
            key,
            expire,
            json.dumps(value)
        )
    
    async def delete(self, key: str):
        """Delete cached value"""
        await self.redis.delete(key)

## 7. Configure FastAPI Application and Endpoints

In [None]:
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
import os

# Initialize FastAPI
app = FastAPI(
    title="Career Navigation API",
    description="AI-powered career path discovery platform",
    version="1.0.0"
)

# CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize services (Mocking env vars for notebook if needed, or assume .env is loaded)
# In a real notebook run, you would set these variables or load from .env
os.environ["GOOGLE_API_KEY"] = "your-key"
os.environ["PINECONE_API_KEY"] = "your-key"
os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USER"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"
os.environ["REDIS_URL"] = "redis://localhost:6379"

resume_parser = AIResumeParser(google_api_key=os.getenv("GOOGLE_API_KEY"))
skill_db = SkillVectorDB(pinecone_api_key=os.getenv("PINECONE_API_KEY"))
career_graph = CareerGraphDB(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USER"),
    password=os.getenv("NEO4J_PASSWORD")
)
cache = RedisCache(redis_url=os.getenv("REDIS_URL"))

def calculate_path_score(path: Dict) -> float:
    """Calculate overall path score"""
    weights = {
        'skill_match': 0.4,
        'salary_growth': 0.3,
        'timeline': 0.2,
        'difficulty': 0.1
    }
    
    # Normalize values
    skill_score = path['skill_match'] / 100
    salary_score = min(path['salary_growth'] / 50000, 1.0)
    timeline_score = 1 - (path['timeline_months'] / 60)
    difficulty_score = 1 - (path['difficulty'] / 10)
    
    return (
        weights['skill_match'] * skill_score +
        weights['salary_growth'] * salary_score +
        weights['timeline'] * timeline_score +
        weights['difficulty'] * difficulty_score
    )

@app.post("/api/v1/resume/parse", response_model=ParsedResume)
async def parse_resume(file: UploadFile = File(...)):
    """Parse uploaded resume"""
    try:
        # Read file
        contents = await file.read()
        
        # Check cache
        cache_key = f"resume:{file.filename}"
        cached = await cache.get(cache_key)
        if cached:
            return cached
        
        # Parse resume
        text = extract_text(contents, file.filename)
        parsed_data = await resume_parser.parse_resume(text)
        
        # Cache result
        await cache.set(cache_key, parsed_data.dict(), expire=3600)
        
        return parsed_data
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/v1/career-paths", response_model=CareerPathResponse)
async def get_career_paths(request: CareerPathRequest):
    """Get personalized career paths"""
    try:
        # Find paths in graph
        paths = career_graph.find_career_paths(
            current_role=request.current_role,
            target_role=request.target_role
        )
        
        # Analyze skill gaps for each path
        analyzed_paths = []
        for path in paths:
            skill_gap = skill_db.match_user_skills_to_role(
                user_skills=request.user_skills,
                role_required_skills=path.required_skills
            )
            
            analyzed_paths.append({
                'roles': path.roles,
                'timeline_months': path.total_months,
                'difficulty': path.avg_difficulty,
                'salary_growth': path.salary_growth,
                'skill_match': skill_gap['match_percentage'],
                'missing_skills': skill_gap['missing_skills']
            })
        
        # Rank paths by overall score
        for path in analyzed_paths:
            path['score'] = calculate_path_score(path)
        
        analyzed_paths.sort(key=lambda x: x['score'], reverse=True)
        
        return {
            'paths': analyzed_paths,
            'recommended_path': analyzed_paths[0] if analyzed_paths else None,
            'skill_gaps': skill_gap
        }
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

## 8. Run Integrated Career Navigation Workflow

In [None]:
import asyncio

async def main():
    print("Starting Career Navigation Workflow...")
    
    # 1. Parse Resume (Mocking file upload)
    # In a real scenario, you would upload a file. Here we simulate the text extraction.
    print("\n1. Parsing Resume...")
    # Mock resume text for demonstration if no file is available
    resume_text = """
    John Doe
    Software Engineer
    Experience:
    - Junior Developer at TechCorp (2 years)
      Skills: Python, Java, SQL
    - Software Engineer at BigData Inc (3 years)
      Skills: Python, FastAPI, React, Docker
    """
    
    # We can call the parser directly with text
    # parsed_resume = await resume_parser.parse_resume(resume_text)
    
    # Mocking parsed result to avoid API calls in this demo if keys are not set
    print("Parsed Resume: John Doe")
    print("Current Role: Software Engineer")
    user_skills = ["Python", "FastAPI", "React", "Docker", "SQL", "Java"]
    print(f"Skills: {user_skills}")
    
    # 2. Find Career Paths
    print("\n2. Finding Career Paths...")
    # Ensure graph DB is connected or mock the response
    try:
        paths = career_graph.find_career_paths(
            current_role="Software Engineer",
            target_role="Senior Software Engineer" # Using a role we seeded
        )
        
        print(f"Found {len(paths)} career paths:")
        for i, path in enumerate(paths, 1):
            print(f"\nPath {i}:")
            print(f"  Roles: {' -> '.join(path.roles)}")
            print(f"  Timeline: {path.total_months} months")
            print(f"  Difficulty: {path.avg_difficulty}/10")
            print(f"  Salary Growth: ${path.salary_growth}")
            
            # 3. Analyze Skill Gap for this path
            print(f"  Analyzing Skill Gap...")
            # Ensure vector DB is connected or mock
            try:
                gap_analysis = skill_db.match_user_skills_to_role(user_skills, path.required_skills)
                print(f"  Skill Match: {gap_analysis['match_percentage']:.1f}%")
                print(f"  Missing Skills: {gap_analysis['missing_skills']}")
            except Exception as e:
                print(f"  Vector DB Error: {e}")

    except Exception as e:
        print(f"Graph DB Error: {e}")
        print("Ensure Neo4j is running and seeded.")

# Run the workflow
# await main() # Uncomment to run in Jupyter
print("Workflow defined. Uncomment 'await main()' to run.")