In [None]:
"""
PathFinder AI - Phase 2: Doc2Vec Job Matcher
Dataset: Jobs & Skills Mapping for Career Analysis
"""

import pandas as pd
import numpy as np
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.utils import simple_preprocess
import joblib
import json
from sklearn.metrics.pairwise import cosine_similarity

# Load Dataset 2
print("="*70)
print("PHASE 2: DOC2VEC JOB MATCHER")
print("="*70)

# Update this path to your dataset location
df = pd.read_csv('/kaggle/input/jobs-and-skills-mapping-for-career-analysis/formatted_jobs.csv')

print(f"\nDataset loaded: {len(df)} records")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst 3 rows:")
print(df.head(3))

# Data Preprocessing
print("\n--- Preprocessing Job Data ---")

# Create job documents by combining all text fields
def create_job_document(row):
    """Combine job title, description, and skills into one document"""
    title = str(row['job_title'])
    description = str(row['Short_description'])
    skills = str(row['Skills_required'])
    
    # Combine all fields
    document = f"{title} {description} {skills}"
    return document

df['job_document'] = df.apply(create_job_document, axis=1)

print(f"Sample job document:\n{df['job_document'].iloc[0][:200]}...")

# Tokenize documents
print("\n--- Tokenizing Documents ---")

def tokenize_document(doc):
    """Simple tokenization and cleaning"""
    return simple_preprocess(doc, deacc=True, min_len=2, max_len=15)

df['tokens'] = df['job_document'].apply(tokenize_document)

print(f"Sample tokens: {df['tokens'].iloc[0][:10]}")

# Create TaggedDocuments for Doc2Vec
tagged_documents = [
    TaggedDocument(words=tokens, tags=[str(idx)]) 
    for idx, tokens in enumerate(df['tokens'])
]

print(f"\nTotal documents for training: {len(tagged_documents)}")

# Train Doc2Vec Model
print("\n--- Training Doc2Vec Model ---")
print("This may take a few minutes...")

doc2vec_model = Doc2Vec(
    vector_size=100,        # Dimension of embeddings
    window=5,               # Context window
    min_count=2,            # Ignore words that appear less than 2 times
    workers=4,              # Parallel processing
    epochs=40,              # Training iterations
    dm=0,                   # PV-DBOW (faster, good for classification)
    dbow_words=1,           # Train word vectors too
    seed=42
)

# Build vocabulary
doc2vec_model.build_vocab(tagged_documents)
print(f"Vocabulary size: {len(doc2vec_model.wv)}")

# Train the model
doc2vec_model.train(
    tagged_documents,
    total_examples=doc2vec_model.corpus_count,
    epochs=doc2vec_model.epochs
)

print("Doc2Vec training complete!")

# Save the model
doc2vec_model.save('doc2vec_job_model.model')
print("Doc2Vec model saved as 'doc2vec_job_model.model'")

# Pre-compute job vectors
print("\n--- Pre-computing Job Vectors ---")

job_vectors = np.array([doc2vec_model.dv[str(i)] for i in range(len(df))])
print(f"Job vectors shape: {job_vectors.shape}")

# Save job vectors and metadata
job_metadata = df[['ID_num', 'job_title', 'Short_description', 'Skills_required', 
                    'Industry', 'Pay_grade']].copy()
job_metadata['vector_id'] = range(len(df))

joblib.dump(job_vectors, 'job_vectors.pkl')
joblib.dump(job_metadata, 'job_metadata.pkl')

print("Job vectors saved as 'job_vectors.pkl'")
print("Job metadata saved as 'job_metadata.pkl'")

# Save job database as JSON for easy access
job_database = df.to_dict('records')
with open('job_database.json', 'w') as f:
    json.dump(job_database, f, indent=4)

print("Job database saved as 'job_database.json'")

# Test the model
print("\n" + "="*70)
print("TESTING DOC2VEC JOB MATCHER")
print("="*70)

def match_jobs(resume_text, top_k=10):
    """
    Match jobs based on resume text
    
    Parameters:
    resume_text: string containing resume content
    top_k: number of top matching jobs to return
    
    Returns:
    list of matching jobs with similarity scores
    """
    # Load model and data
    model = Doc2Vec.load('doc2vec_job_model.model')
    vectors = joblib.load('job_vectors.pkl')
    metadata = joblib.load('job_metadata.pkl')
    
    # Tokenize resume
    resume_tokens = tokenize_document(resume_text)
    
    # Infer vector for resume
    resume_vector = model.infer_vector(resume_tokens, epochs=20)
    resume_vector = resume_vector.reshape(1, -1)
    
    # Calculate cosine similarity with all jobs
    similarities = cosine_similarity(resume_vector, vectors)[0]
    
    # Get top K jobs
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    
    # Prepare results
    results = []
    for idx in top_indices:
        job = metadata.iloc[idx]
        results.append({
            'job_id': int(job['ID_num']),
            'job_title': job['job_title'],
            'description': job['Short_description'],
            'skills_required': job['Skills_required'],
            'industry': job['Industry'],
            'pay_grade': job['Pay_grade'],
            'match_score': round(float(similarities[idx]) * 100, 2)
        })
    
    return results

# Test with sample resume
test_resume = """
Experienced Software Engineer with 5 years in web development.
Proficient in Python, JavaScript, and React. Strong problem-solving skills
and experience with cloud computing platforms. Built scalable applications
serving thousands of users. Team collaboration and critical thinking.
"""

print(f"\nTest Resume:\n{test_resume}\n")

matched_jobs = match_jobs(test_resume, top_k=10)

print("--- Top 10 Matching Jobs ---")
for i, job in enumerate(matched_jobs, 1):
    print(f"\n{i}. {job['job_title']} (Match: {job['match_score']}%)")
    print(f"   Industry: {job['industry']}")
    print(f"   Pay Grade: {job['pay_grade']}")
    print(f"   Skills: {job['skills_required'][:80]}...")
    print(f"   Description: {job['description'][:100]}...")

print("\n" + "="*70)
print("DOC2VEC JOB MATCHER - COMPLETE ‚úì")
print("="*70)
print("\nSaved artifacts:")
print("  - doc2vec_job_model.model")
print("  - job_vectors.pkl")
print("  - job_metadata.pkl")
print("  - job_database.json")

# ---

In [None]:
"""
PathFinder AI - Gemini Skill Extractor
Replaces mock skill extraction with real Gemini API
"""

import os
import json
from google import genai
from google.genai import types

class GeminiSkillExtractor:
    """Extract skills from resume using Gemini API"""
    
    def __init__(self, api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"):
        """
        Initialize Gemini client
        
        Parameters:
        api_key: Gemini API key (if None, reads from environment)
        """
        if api_key:
            self.api_key = "AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"
        else:
            self.api_key = os.environ.get("GEMINI_API_KEY")
        
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY not found. Set it as environment variable or pass to constructor.")
        
        self.client = genai.Client(api_key=self.api_key)
        self.model = "gemini-flash-latest"
    
    def extract_skills(self, resume_text):
        """
        Extract technical and soft skills from resume
        
        Parameters:
        resume_text: string containing resume content
        
        Returns:
        dict with 'technical_skills' and 'soft_skills' lists
        """
        
        prompt = f"""You are a skill extraction expert for career recommendation systems.

Extract ALL skills from this resume and categorize them.

INSTRUCTIONS:
1. Extract ONLY actual skills (not job titles, companies, or responsibilities)
2. Return as valid JSON with two arrays: "technical_skills" and "soft_skills"
3. Use standard skill names (e.g., "Machine Learning" not "ML")
4. Include programming languages, frameworks, tools, methodologies
5. Soft skills: leadership, communication, problem-solving, etc.

RESUME:
{resume_text}

OUTPUT FORMAT (JSON only, no markdown):
{{
  "technical_skills": ["Python", "Machine Learning", "Docker", ...],
  "soft_skills": ["Leadership", "Problem Solving", "Team Collaboration", ...]
}}

Return ONLY the JSON object, nothing else."""

        try:
            contents = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=prompt)],
                ),
            ]
            
            generate_content_config = types.GenerateContentConfig(
                temperature=0.1,  # Low temperature for consistent extraction
                top_p=0.95,
                top_k=40,
                max_output_tokens=2048,
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=contents,
                config=generate_content_config,
            )
            
            # Extract text from response
            response_text = response.text.strip()
            
            # Remove markdown code blocks if present
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()
            
            # Parse JSON
            skills_data = json.loads(response_text)
            
            return skills_data
        
        except json.JSONDecodeError as e:
            print(f"Error parsing Gemini response: {e}")
            print(f"Raw response: {response_text}")
            return {"technical_skills": [], "soft_skills": []}
        
        except Exception as e:
            print(f"Error calling Gemini API: {e}")
            return {"technical_skills": [], "soft_skills": []}
    
    def extract_skills_simple(self, resume_text):
        """
        Extract all skills as a single list (for backward compatibility)
        
        Parameters:
        resume_text: string containing resume content
        
        Returns:
        list of all skills (technical + soft combined)
        """
        skills_data = self.extract_skills(resume_text)
        all_skills = skills_data.get('technical_skills', []) + skills_data.get('soft_skills', [])
        return all_skills


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("GEMINI SKILL EXTRACTOR - TEST")
    print("="*70)
    
    # Test Resume
    test_resume = """
    John Doe
    Software Engineer
    john.doe@email.com | LinkedIn: linkedin.com/in/johndoe
    
    PROFESSIONAL SUMMARY
    Experienced Software Engineer with 5 years in full-stack development.
    Strong problem-solving abilities and team leadership experience.
    
    TECHNICAL SKILLS
    - Languages: Python, JavaScript, Java, TypeScript, SQL
    - Frontend: React, Vue.js, HTML5, CSS3, Tailwind CSS
    - Backend: Node.js, FastAPI, Django, Express.js
    - Databases: PostgreSQL, MongoDB, Redis, MySQL
    - DevOps: Docker, Kubernetes, AWS, Azure, CI/CD, Jenkins
    - Tools: Git, GitHub Actions, Terraform, Nginx
    - Methodologies: Agile, Scrum, Test-Driven Development
    
    EXPERIENCE
    Senior Software Engineer | TechCorp Inc. | 2020 - Present
    - Led team of 5 developers in building microservices architecture
    - Implemented CI/CD pipelines reducing deployment time by 40%
    - Mentored junior developers and conducted code reviews
    
    Software Engineer | StartupX | 2018 - 2020
    - Built RESTful APIs serving 100k+ daily users
    - Optimized database queries improving performance by 60%
    - Collaborated with cross-functional teams
    
    EDUCATION
    B.S. Computer Science | University of Technology | 2018
    
    CERTIFICATIONS
    - AWS Certified Solutions Architect
    - Docker Certified Associate
    """
    
    try:
        # Initialize extractor
        extractor = GeminiSkillExtractor()
        
        print("\nExtracting skills from resume...\n")
        
        # Extract skills (categorized)
        skills = extractor.extract_skills(test_resume)
        
        print("\n--- EXTRACTED SKILLS ---\n")
        
        print("üìã TECHNICAL SKILLS:")
        for skill in skills['technical_skills']:
            print(f"  ‚Ä¢ {skill}")
        
        print(f"\nTotal Technical Skills: {len(skills['technical_skills'])}")
        
        print("\nüí™ SOFT SKILLS:")
        for skill in skills['soft_skills']:
            print(f"  ‚Ä¢ {skill}")
        
        print(f"\nTotal Soft Skills: {len(skills['soft_skills'])}")
        
        # Test simple extraction (combined list)
        print("\n\n--- SIMPLE EXTRACTION (Combined) ---\n")
        all_skills = extractor.extract_skills_simple(test_resume)
        print(f"All Skills ({len(all_skills)} total):")
        for skill in all_skills[:15]:  # Show first 15
            print(f"  ‚Ä¢ {skill}")
        
        if len(all_skills) > 15:
            print(f"  ... and {len(all_skills) - 15} more")
        
        print("\n" + "="*70)
        print("GEMINI SKILL EXTRACTOR - TEST COMPLETE ‚úì")
        print("="*70)
        
    except ValueError as e:
        print(f"\n‚ùå ERROR: {e}")
        print("\nTo fix this:")
        print("1. Get Gemini API key from: https://aistudio.google.com/apikey")
        print("2. Set environment variable: export GEMINI_API_KEY='your-key-here'")
        print("3. Or pass API key to constructor: GeminiSkillExtractor(api_key='your-key')")
    
    except Exception as e:
        print(f"\n‚ùå Unexpected error: {e}")

In [None]:
"""
PathFinder AI - Phase 2: Updated Combined System with Gemini
Now uses real Gemini API for skill extraction
"""

import pandas as pd
import numpy as np
import joblib
import json
import os
from gensim.models.doc2vec import Doc2Vec
from gensim.utils import simple_preprocess
from sklearn.metrics.pairwise import cosine_similarity
# from gemini_skill_extractor import GeminiSkillExtractor

print("="*70)
print("PHASE 2: COMBINED SYSTEM WITH GEMINI API")
print("="*70)

# ============================================================================
# COMPONENT 1: GEMINI SKILL EXTRACTION
# ============================================================================

def extract_skills_with_gemini(resume_text, api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"):
    """
    Extract skills using Gemini API
    
    Parameters:
    resume_text: string containing resume
    api_key: optional Gemini API key
    
    Returns:
    dict with technical_skills and soft_skills
    """
    try:
        extractor = GeminiSkillExtractor(api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg")
        skills = extractor.extract_skills(resume_text)
        return skills
    except Exception as e:
        print(f"‚ö†Ô∏è Gemini API error: {e}")
        print("Falling back to mock extraction...")
        # Fallback to mock
        return extract_skills_mock(resume_text)

def extract_skills_mock(resume_text):
    """Fallback mock skill extraction"""
    skills_keywords = [
        'Python', 'Java', 'JavaScript', 'React', 'Node.js', 'SQL', 'MongoDB',
        'Machine Learning', 'Data Analysis', 'Deep Learning', 'TensorFlow',
        'PyTorch', 'Docker', 'Kubernetes', 'AWS', 'Azure', 'Cloud Computing',
        'System Design', 'Problem Solving', 'Critical Thinking', 'FastAPI',
        'Flask', 'Django', 'PostgreSQL', 'Redis', 'Microservices', 'REST API',
        'GraphQL', 'HTML', 'CSS', 'TypeScript', 'Git', 'CI/CD', 'Agile',
        'Team Collaboration', 'Communication', 'Leadership', 'Data Science',
        'Statistics', 'Research', 'Analytical Thinking', 'Pattern Recognition'
    ]
    
    extracted = []
    resume_lower = resume_text.lower()
    
    for skill in skills_keywords:
        if skill.lower() in resume_lower:
            extracted.append(skill)
    
    return {
        'technical_skills': extracted,
        'soft_skills': []
    }

# ============================================================================
# COMPONENT 2: KNN CAREER RECOMMENDATION
# ============================================================================

def recommend_careers(user_skills, top_k=5):
    """Recommend careers using KNN"""
    try:
        knn = joblib.load('/kaggle/input/ai-career-recommendation-system/knn_career_model.pkl')
        mlb = joblib.load('/kaggle/input/ai-career-recommendation-system/skills_mlb.pkl')
        career_ref = joblib.load('/kaggle/input/ai-career-recommendation-system/career_reference.pkl')
        
        user_skills_encoded = mlb.transform([user_skills])
        distances, indices = knn.kneighbors(
            user_skills_encoded, 
            n_neighbors=min(top_k, len(career_ref))
        )
        
        recommendations = []
        for dist, idx in zip(distances[0], indices[0]):
            similarity = 1 - dist
            career = career_ref.iloc[idx]['Career']
            career_skills = career_ref.iloc[idx]['Skills']
            
            matching_skills = set(user_skills) & set(career_skills)
            missing_skills = set(career_skills) - set(user_skills)
            
            recommendations.append({
                'career': career,
                'similarity_score': round(similarity * 100, 2),
                'matching_skills': list(matching_skills),
                'missing_skills': list(missing_skills)[:5],
                'total_required_skills': len(career_skills)
            })
        
        return recommendations
    
    except Exception as e:
        print(f"Error in career recommendation: {e}")
        return []

# ============================================================================
# COMPONENT 3: DOC2VEC JOB MATCHING
# ============================================================================

def tokenize_document(doc):
    """Tokenize document"""
    return simple_preprocess(doc, deacc=True, min_len=2, max_len=15)

def match_jobs(resume_text, top_k=10, filters=None):
    """Match jobs using Doc2Vec"""
    try:
        model = Doc2Vec.load('doc2vec_job_model.model')
        vectors = joblib.load('job_vectors.pkl')
        metadata = joblib.load('job_metadata.pkl')
        
        if filters:
            mask = pd.Series([True] * len(metadata))
            
            if 'industry' in filters and filters['industry']:
                mask &= metadata['Industry'] == filters['industry']
            
            if 'pay_grade' in filters and filters['pay_grade']:
                mask &= metadata['Pay_grade'] == filters['pay_grade']
            
            filtered_metadata = metadata[mask].reset_index(drop=True)
            filtered_vectors = vectors[mask.values]
        else:
            filtered_metadata = metadata
            filtered_vectors = vectors
        
        if len(filtered_metadata) == 0:
            return []
        
        resume_tokens = tokenize_document(resume_text)
        resume_vector = model.infer_vector(resume_tokens, epochs=20)
        resume_vector = resume_vector.reshape(1, -1)
        
        similarities = cosine_similarity(resume_vector, filtered_vectors)[0]
        
        top_k = min(top_k, len(similarities))
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        
        results = []
        for idx in top_indices:
            job = filtered_metadata.iloc[idx]
            results.append({
                'job_id': int(job['ID_num']),
                'job_title': job['job_title'],
                'description': job['Short_description'],
                'skills_required': job['Skills_required'],
                'industry': job['Industry'],
                'pay_grade': job['Pay_grade'],
                'match_score': round(float(similarities[idx]) * 100, 2)
            })
        
        return results
    
    except Exception as e:
        print(f"Error in job matching: {e}")
        return []

# ============================================================================
# COMPONENT 4: COMBINED PIPELINE WITH GEMINI
# ============================================================================

def complete_recommendation_pipeline(resume_text, top_careers=5, top_jobs=10, 
                                     job_filters=None, use_gemini=True, api_key=None):
    """
    Complete pipeline with Gemini skill extraction
    
    Parameters:
    resume_text: resume content
    top_careers: number of career recommendations
    top_jobs: number of job matches
    job_filters: optional filters (industry, pay_grade)
    use_gemini: if True, use Gemini API; if False, use mock
    api_key: optional Gemini API key
    """
    print("\n" + "="*70)
    print("RUNNING COMPLETE PIPELINE WITH GEMINI")
    print("="*70)
    
    # Step 1: Extract skills
    print("\n[Step 1] Extracting skills from resume...")
    
    if use_gemini:
        print("Using Gemini API...")
        skills_data = extract_skills_with_gemini(resume_text, api_key=api_key)
    else:
        print("Using mock extraction...")
        skills_data = extract_skills_mock(resume_text)
    
    technical_skills = skills_data.get('technical_skills', [])
    soft_skills = skills_data.get('soft_skills', [])
    all_skills = technical_skills + soft_skills
    
    print(f"Extracted {len(technical_skills)} technical skills, {len(soft_skills)} soft skills")
    
    # Step 2: Career recommendations
    print("\n[Step 2] Finding matching careers...")
    career_recommendations = recommend_careers(all_skills, top_k=top_careers)
    
    # Step 3: Job matching
    print("\n[Step 3] Matching jobs to resume...")
    job_matches = match_jobs(resume_text, top_k=top_jobs, filters=job_filters)
    
    results = {
        'technical_skills': technical_skills,
        'soft_skills': soft_skills,
        'all_skills': all_skills,
        'recommended_careers': career_recommendations,
        'matched_jobs': job_matches
    }
    
    return results

def display_results(results):
    """Display results in formatted output"""
    print("\n" + "="*70)
    print("RECOMMENDATION RESULTS")
    print("="*70)
    
    # Technical Skills
    print("\nüíª TECHNICAL SKILLS:")
    for skill in results['technical_skills'][:15]:  # Show first 15
        print(f"  ‚Ä¢ {skill}")
    if len(results['technical_skills']) > 15:
        print(f"  ... and {len(results['technical_skills']) - 15} more")
    
    # Soft Skills
    print("\nüí™ SOFT SKILLS:")
    for skill in results['soft_skills'][:10]:  # Show first 10
        print(f"  ‚Ä¢ {skill}")
    if len(results['soft_skills']) > 10:
        print(f"  ... and {len(results['soft_skills']) - 10} more")
    
    # Career Recommendations
    print("\nüéØ RECOMMENDED CAREERS:")
    for i, career in enumerate(results['recommended_careers'], 1):
        print(f"\n{i}. {career['career']} ({career['similarity_score']}% match)")
        if career['matching_skills']:
            print(f"   Matching Skills: {', '.join(career['matching_skills'][:5])}")
        if career['missing_skills']:
            print(f"   Skills to Learn: {', '.join(career['missing_skills'][:3])}")
    
    # Job Matches
    print("\nüíº MATCHING JOBS:")
    for i, job in enumerate(results['matched_jobs'], 1):
        print(f"\n{i}. {job['job_title']} ({job['match_score']}% match)")
        print(f"   Industry: {job['industry']} | Pay: {job['pay_grade']}")
        print(f"   Skills: {job['skills_required'][:70]}...")

# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    # Test Resume
    test_resume = """
    Sarah Chen
    Full-Stack Developer
    sarah.chen@email.com | Portfolio: sarahchen.dev
    
    SUMMARY
    Full-stack developer with 4 years experience building scalable web applications.
    Passionate about clean code and user experience. Strong problem-solving skills.
    
    TECHNICAL SKILLS
    Frontend: React, Next.js, TypeScript, HTML5, CSS3, Tailwind CSS
    Backend: Node.js, Python, FastAPI, Express.js, Django
    Databases: PostgreSQL, MongoDB, Redis
    Cloud & DevOps: AWS, Docker, Kubernetes, CI/CD, GitHub Actions
    Tools: Git, Jest, Cypress, Webpack
    
    SOFT SKILLS
    Team collaboration, leadership, communication, critical thinking, time management
    
    EXPERIENCE
    Senior Developer | WebCorp | 2021 - Present
    - Led development of e-commerce platform serving 50k users
    - Implemented microservices architecture with Docker
    - Mentored 3 junior developers
    - Improved page load time by 60%
    
    Developer | StartupHub | 2019 - 2021
    - Built RESTful APIs with Node.js and PostgreSQL
    - Developed responsive React applications
    - Collaborated with cross-functional teams
    
    EDUCATION
    B.S. Computer Science | Tech University | 2019
    
    CERTIFICATIONS
    - AWS Certified Developer
    - React Advanced Certification
    """
    
    print("\n" + "="*70)
    print("TEST: FULL-STACK DEVELOPER PROFILE")
    print("="*70)
    
    # Check if Gemini API key is available
    gemini_key = "AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"
    use_gemini = bool(gemini_key)
    
    if use_gemini:
        print("\n‚úì Gemini API key found - Using real Gemini extraction")
    else:
        print("\n‚ö†Ô∏è Gemini API key not found - Using mock extraction")
        print("Set GEMINI_API_KEY environment variable to use real extraction")
    
    # Run pipeline
    results = complete_recommendation_pipeline(
        resume_text=test_resume,
        top_careers=5,
        top_jobs=10,
        use_gemini=use_gemini
    )
    
    display_results(results)
    
    print("\n" + "="*70)
    print("PHASE 2 WITH GEMINI - COMPLETE ‚úì")
    print("="*70)
    
    if use_gemini:
        print("\n‚úì Gemini API skill extraction: WORKING")
    else:
        print("\n‚ö†Ô∏è Gemini API: NOT CONFIGURED (using mock)")
        print("To enable: export GEMINI_API_KEY='your-key-here'")
    
    print("‚úì KNN Career Recommendation: WORKING")
    print("‚úì Doc2Vec Job Matching: WORKING")
    print("‚úì Combined Pipeline: WORKING")

In [None]:
"""
PathFinder AI - Phase 3: Gemini Roadmap Generator
Generates 3-4 personalized career roadmap variants
"""

import os
import json
from google import genai
from google.genai import types

class GeminiRoadmapGenerator:
    """Generate career roadmaps using Gemini API"""
    
    def __init__(self, api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"):
        """Initialize Gemini client"""
        if api_key:
            self.api_key = "AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"
        else:
            self.api_key = os.environ.get("GEMINI_API_KEY")
        
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY not found")
        
        self.client = genai.Client(api_key=self.api_key)
        self.model = "gemini-flash-latest"
    
    def generate_roadmaps(self, user_profile):
        """
        Generate 3-4 roadmap variants for user
        
        Parameters:
        user_profile: dict containing:
            - target_career: str (e.g., "Data Scientist")
            - current_skills: list (e.g., ["Python", "SQL"])
            - missing_skills: list (e.g., ["Machine Learning", "Statistics"])
            - experience_level: str (e.g., "beginner", "intermediate", "advanced")
            - time_commitment: str (e.g., "full-time", "part-time", "weekends")
        
        Returns:
        list of 4 roadmap variants, each with different learning paths
        """
        
        target_career = user_profile.get('target_career', 'Software Engineer')
        current_skills = user_profile.get('current_skills', [])
        missing_skills = user_profile.get('missing_skills', [])
        experience_level = user_profile.get('experience_level', 'intermediate')
        time_commitment = user_profile.get('time_commitment', 'part-time')
        
        prompt = f"""You are a career roadmap expert. Generate 4 DIFFERENT learning roadmaps for someone to become a {target_career}.

USER PROFILE:
- Target Career: {target_career}
- Current Skills: {', '.join(current_skills) if current_skills else 'None'}
- Skills to Learn: {', '.join(missing_skills) if missing_skills else 'Various'}
- Experience Level: {experience_level}
- Time Commitment: {time_commitment}

REQUIREMENTS:
1. Create 4 DISTINCT roadmap variants with different approaches:
   - Variant 1: Fast-track intensive path (aggressive timeline)
   - Variant 2: Balanced structured path (moderate timeline)
   - Variant 3: Self-paced flexible path (relaxed timeline)
   - Variant 4: Project-based practical path (learning by doing)

2. Each roadmap must have:
   - roadmap_name: descriptive name
   - description: 1-2 sentence overview
   - duration_months: estimated time to complete
   - difficulty: "beginner", "intermediate", or "advanced"
   - steps: array of 5-8 learning steps

3. Each step must have:
   - step_number: 1, 2, 3...
   - title: clear step name
   - description: what to learn/do
   - duration_weeks: time for this step
   - resources: 2-3 specific learning resources (courses, books, projects)
   - skills_gained: list of skills learned in this step

OUTPUT FORMAT (JSON only):
{{
  "roadmaps": [
    {{
      "roadmap_id": 1,
      "roadmap_name": "Fast-Track Intensive Path",
      "description": "Aggressive 6-month bootcamp-style learning",
      "duration_months": 6,
      "difficulty": "intermediate",
      "steps": [
        {{
          "step_number": 1,
          "title": "Master Python Fundamentals",
          "description": "Learn Python syntax, data structures, OOP",
          "duration_weeks": 3,
          "resources": ["Python Crash Course (book)", "Codecademy Python", "100 Days of Code"],
          "skills_gained": ["Python", "Data Structures", "Algorithms"]
        }},
        ...
      ]
    }},
    ... 3 more roadmaps
  ]
}}

Return ONLY valid JSON, no markdown or extra text."""

        try:
            contents = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=prompt)],
                ),
            ]
            
            generate_content_config = types.GenerateContentConfig(
                temperature=0.7,  # Higher for creative variety
                top_p=0.95,
                top_k=40,
                max_output_tokens=4096,
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=contents,
                config=generate_content_config,
            )
            
            response_text = response.text.strip()
            
            # Clean markdown
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()
            
            # Parse JSON
            roadmaps_data = json.loads(response_text)
            
            return roadmaps_data.get('roadmaps', [])
        
        except json.JSONDecodeError as e:
            print(f"Error parsing Gemini response: {e}")
            print(f"Raw response: {response_text[:500]}...")
            return []
        
        except Exception as e:
            print(f"Error calling Gemini API: {e}")
            return []
    
    def analyze_skill_gap(self, user_skills, required_skills):
        """
        Analyze skill gap between current and required skills
        
        Parameters:
        user_skills: list of current skills
        required_skills: list of required skills for career
        
        Returns:
        dict with gap analysis
        """
        
        prompt = f"""Analyze the skill gap for career transition.

CURRENT SKILLS:
{', '.join(user_skills) if user_skills else 'None'}

REQUIRED SKILLS:
{', '.join(required_skills)}

TASK:
1. Identify which current skills are transferable
2. List skills that need to be learned
3. Prioritize missing skills (high/medium/low priority)
4. Estimate learning time for each missing skill

OUTPUT FORMAT (JSON only):
{{
  "transferable_skills": ["skill1", "skill2"],
  "missing_skills": [
    {{
      "skill": "Machine Learning",
      "priority": "high",
      "learning_time_weeks": 8,
      "difficulty": "intermediate",
      "reason": "Core requirement for data science roles"
    }},
    ...
  ],
  "learning_path_summary": "Brief 2-3 sentence summary of recommended approach"
}}

Return ONLY valid JSON."""

        try:
            contents = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=prompt)],
                ),
            ]
            
            generate_content_config = types.GenerateContentConfig(
                temperature=0.3,
                max_output_tokens=2048,
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=contents,
                config=generate_content_config,
            )
            
            response_text = response.text.strip()
            
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()
            
            gap_analysis = json.loads(response_text)
            
            return gap_analysis
        
        except Exception as e:
            print(f"Error in skill gap analysis: {e}")
            return {
                "transferable_skills": [],
                "missing_skills": [],
                "learning_path_summary": "Unable to analyze skill gap"
            }


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 3: GEMINI ROADMAP GENERATOR - TEST")
    print("="*70)
    
    try:
        generator = GeminiRoadmapGenerator()
        
        # Test Case 1: Data Scientist Career
        print("\n" + "="*70)
        print("TEST 1: DATA SCIENTIST ROADMAP")
        print("="*70)
        
        user_profile_1 = {
            'target_career': 'Data Scientist',
            'current_skills': ['Python', 'SQL', 'Excel'],
            'missing_skills': ['Machine Learning', 'Statistics', 'Deep Learning', 'TensorFlow'],
            'experience_level': 'beginner',
            'time_commitment': 'part-time'
        }
        
        print("\nGenerating roadmaps...")
        roadmaps_1 = generator.generate_roadmaps(user_profile_1)
        
        print(f"\n‚úì Generated {len(roadmaps_1)} roadmap variants\n")
        
        for roadmap in roadmaps_1:
            print(f"\n{'='*70}")
            print(f"ROADMAP {roadmap['roadmap_id']}: {roadmap['roadmap_name']}")
            print(f"{'='*70}")
            print(f"Description: {roadmap['description']}")
            print(f"Duration: {roadmap['duration_months']} months")
            print(f"Difficulty: {roadmap['difficulty']}")
            print(f"\nSteps ({len(roadmap['steps'])} total):")
            
            for step in roadmap['steps'][:3]:  # Show first 3 steps
                print(f"\n  Step {step['step_number']}: {step['title']}")
                print(f"  Duration: {step['duration_weeks']} weeks")
                print(f"  Skills: {', '.join(step['skills_gained'])}")
                print(f"  Resources: {', '.join(step['resources'][:2])}")
            
            if len(roadmap['steps']) > 3:
                print(f"\n  ... and {len(roadmap['steps']) - 3} more steps")
        
        # Test Case 2: Skill Gap Analysis
        print("\n\n" + "="*70)
        print("TEST 2: SKILL GAP ANALYSIS")
        print("="*70)
        
        print("\nAnalyzing skill gap...")
        gap_analysis = generator.analyze_skill_gap(
            user_skills=['Python', 'SQL', 'Excel'],
            required_skills=['Python', 'SQL', 'Machine Learning', 'Statistics', 
                           'Deep Learning', 'TensorFlow', 'Data Visualization']
        )
        
        print("\n--- SKILL GAP ANALYSIS ---\n")
        
        print("‚úì TRANSFERABLE SKILLS:")
        for skill in gap_analysis.get('transferable_skills', []):
            print(f"  ‚Ä¢ {skill}")
        
        print("\nüìö SKILLS TO LEARN:")
        for skill_info in gap_analysis.get('missing_skills', [])[:5]:
            print(f"\n  ‚Ä¢ {skill_info['skill']}")
            print(f"    Priority: {skill_info['priority']}")
            print(f"    Time: {skill_info['learning_time_weeks']} weeks")
            print(f"    Difficulty: {skill_info['difficulty']}")
        
        print(f"\nüí° SUMMARY:")
        print(f"  {gap_analysis.get('learning_path_summary', 'N/A')}")
        
        # Save roadmaps to JSON
        output_data = {
            'user_profile': user_profile_1,
            'roadmaps': roadmaps_1,
            'skill_gap_analysis': gap_analysis
        }
        
        with open('generated_roadmaps.json', 'w') as f:
            json.dump(output_data, f, indent=2)
        
        print("\n" + "="*70)
        print("GEMINI ROADMAP GENERATOR - TEST COMPLETE ‚úì")
        print("="*70)
        print("\nOutputs:")
        print("  ‚úì 4 roadmap variants generated")
        print("  ‚úì Skill gap analysis complete")
        print("  ‚úì Results saved to generated_roadmaps.json")
        
    except ValueError as e:
        print(f"\n‚ùå ERROR: {e}")
        print("\nTo fix: Set GEMINI_API_KEY environment variable")
    
    except Exception as e:
        print(f"\n‚ùå Unexpected error: {e}")
        import traceback
        traceback.print_exc()

In [None]:
"""
PathFinder AI - Phase 3: Contextual Bandits (RL Algorithm #1)
Selects best roadmap variant from Gemini's 3-4 options
Learns from user feedback over time
"""

import numpy as np
import json
import pickle
from collections import defaultdict

class ContextualBandit:
    """
    Contextual Multi-Armed Bandit for roadmap selection
    Uses epsilon-greedy strategy with UCB (Upper Confidence Bound)
    """
    
    def __init__(self, n_arms=4, epsilon=0.1, alpha=0.1):
        """
        Initialize Contextual Bandit
        
        Parameters:
        n_arms: number of roadmap variants (usually 4)
        epsilon: exploration rate (10% random, 90% best)
        alpha: learning rate for updates
        """
        self.n_arms = n_arms
        self.epsilon = epsilon
        self.alpha = alpha
        
        # Track performance of each arm (roadmap variant)
        self.q_values = np.zeros(n_arms)  # Estimated value of each arm
        self.arm_counts = np.zeros(n_arms)  # How many times each arm selected
        self.total_rewards = np.zeros(n_arms)  # Total reward per arm
        
        # Context-aware: track performance by user type
        self.context_history = defaultdict(lambda: {
            'q_values': np.zeros(n_arms),
            'counts': np.zeros(n_arms),
            'rewards': np.zeros(n_arms)
        })
        
        self.total_selections = 0
    
    def get_context_key(self, user_context):
        """
        Create context key from user profile
        
        Parameters:
        user_context: dict with user info (experience_level, time_commitment, etc.)
        
        Returns:
        string key for context
        """
        exp_level = user_context.get('experience_level', 'intermediate')
        time_commit = user_context.get('time_commitment', 'part-time')
        return f"{exp_level}_{time_commit}"
    
    def select_arm(self, user_context=None):
        """
        Select which roadmap variant to show
        
        Parameters:
        user_context: dict with user profile info
        
        Returns:
        arm_id: which roadmap to show (0-3)
        """
        # Epsilon-greedy: explore vs exploit
        if np.random.random() < self.epsilon:
            # Exploration: random selection
            return np.random.randint(0, self.n_arms)
        else:
            # Exploitation: select best arm
            if user_context:
                context_key = self.get_context_key(user_context)
                context_data = self.context_history[context_key]
                
                # Use UCB (Upper Confidence Bound) for selection
                if np.sum(context_data['counts']) == 0:
                    return np.random.randint(0, self.n_arms)
                
                ucb_values = context_data['q_values'] + np.sqrt(
                    2 * np.log(np.sum(context_data['counts']) + 1) / 
                    (context_data['counts'] + 1e-5)
                )
                return int(np.argmax(ucb_values))
            else:
                # No context: use global Q-values
                return int(np.argmax(self.q_values))
    
    def update(self, arm_id, reward, user_context=None):
        """
        Update arm values based on user feedback
        
        Parameters:
        arm_id: which roadmap was shown (0-3)
        reward: user feedback score (e.g., 1-5 stars, or 0/1 for like/dislike)
        user_context: optional user profile
        """
        # Normalize reward to 0-1 range
        normalized_reward = reward / 5.0 if reward <= 5 else reward
        
        # Update global statistics
        self.arm_counts[arm_id] += 1
        self.total_rewards[arm_id] += normalized_reward
        self.q_values[arm_id] = self.total_rewards[arm_id] / self.arm_counts[arm_id]
        
        # Update context-specific statistics
        if user_context:
            context_key = self.get_context_key(user_context)
            context_data = self.context_history[context_key]
            
            context_data['counts'][arm_id] += 1
            context_data['rewards'][arm_id] += normalized_reward
            context_data['q_values'][arm_id] = (
                context_data['rewards'][arm_id] / context_data['counts'][arm_id]
            )
        
        self.total_selections += 1
    
    def get_statistics(self):
        """Get current bandit statistics"""
        return {
            'q_values': self.q_values.tolist(),
            'arm_counts': self.arm_counts.tolist(),
            'total_rewards': self.total_rewards.tolist(),
            'total_selections': int(self.total_selections),
            'best_arm': int(np.argmax(self.q_values)),
            'context_stats': {
                k: {
                    'q_values': v['q_values'].tolist(),
                    'counts': v['counts'].tolist()
                }
                for k, v in self.context_history.items()
            }
        }
    
    def save(self, filepath='contextual_bandit.pkl'):
        """Save bandit state"""
        with open(filepath, 'wb') as f:
            pickle.dump({
                'q_values': self.q_values,
                'arm_counts': self.arm_counts,
                'total_rewards': self.total_rewards,
                'context_history': dict(self.context_history),
                'total_selections': self.total_selections,
                'n_arms': self.n_arms,
                'epsilon': self.epsilon,
                'alpha': self.alpha
            }, f)
        print(f"Contextual Bandit saved to {filepath}")
    
    @classmethod
    def load(cls, filepath='contextual_bandit.pkl'):
        """Load bandit state"""
        with open(filepath, 'rb') as f:
            data = pickle.load(f)
        
        bandit = cls(
            n_arms=data['n_arms'],
            epsilon=data['epsilon'],
            alpha=data['alpha']
        )
        bandit.q_values = data['q_values']
        bandit.arm_counts = data['arm_counts']
        bandit.total_rewards = data['total_rewards']
        bandit.context_history = defaultdict(lambda: {
            'q_values': np.zeros(data['n_arms']),
            'counts': np.zeros(data['n_arms']),
            'rewards': np.zeros(data['n_arms'])
        }, data['context_history'])
        bandit.total_selections = data['total_selections']
        
        print(f"Contextual Bandit loaded from {filepath}")
        return bandit


# ============================================================================
# SYNTHETIC DATA GENERATOR (for initial training)
# ============================================================================

def generate_synthetic_feedback(n_samples=100):
    """
    Generate synthetic user feedback for initial training
    
    Simulates user preferences:
    - Beginners prefer slower paces (Variant 2, 3)
    - Advanced users prefer fast-track (Variant 1)
    - Project-based (Variant 4) universally liked
    """
    synthetic_data = []
    
    user_types = [
        {'experience_level': 'beginner', 'time_commitment': 'part-time'},
        {'experience_level': 'beginner', 'time_commitment': 'full-time'},
        {'experience_level': 'intermediate', 'time_commitment': 'part-time'},
        {'experience_level': 'intermediate', 'time_commitment': 'full-time'},
        {'experience_level': 'advanced', 'time_commitment': 'part-time'},
        {'experience_level': 'advanced', 'time_commitment': 'full-time'},
    ]
    
    # Simulated preferences (reward probabilities for each arm)
    preferences = {
        'beginner_part-time': [0.3, 0.7, 0.8, 0.6],  # Prefers Variant 2,3
        'beginner_full-time': [0.5, 0.8, 0.6, 0.7],  # Prefers Variant 2
        'intermediate_part-time': [0.6, 0.7, 0.7, 0.8],  # Balanced
        'intermediate_full-time': [0.7, 0.7, 0.5, 0.8],  # Prefers 1,4
        'advanced_part-time': [0.8, 0.5, 0.4, 0.7],  # Prefers Variant 1
        'advanced_full-time': [0.9, 0.5, 0.3, 0.7],  # Strongly prefers 1
    }
    
    for _ in range(n_samples):
        user_context = np.random.choice([ut for ut in user_types])
        context_key = f"{user_context['experience_level']}_{user_context['time_commitment']}"
        
        # Each arm shown with equal probability initially
        arm = np.random.randint(0, 4)
        
        # Generate reward based on preferences
        reward_prob = preferences[context_key][arm]
        reward = 5 if np.random.random() < reward_prob else np.random.randint(1, 4)
        
        synthetic_data.append({
            'user_context': user_context,
            'arm_selected': arm,
            'reward': reward
        })
    
    return synthetic_data


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 3: CONTEXTUAL BANDITS - TEST")
    print("="*70)
    
    # Initialize bandit
    print("\n[Step 1] Initializing Contextual Bandit...")
    bandit = ContextualBandit(n_arms=4, epsilon=0.1)
    print("‚úì Bandit initialized with 4 arms (roadmap variants)")
    
    # Generate synthetic training data
    print("\n[Step 2] Generating synthetic user feedback...")
    synthetic_data = generate_synthetic_feedback(n_samples=200)
    print(f"‚úì Generated {len(synthetic_data)} synthetic feedback samples")
    
    # Train bandit on synthetic data
    print("\n[Step 3] Training bandit on synthetic data...")
    for data in synthetic_data:
        arm = data['arm_selected']
        reward = data['reward']
        context = data['user_context']
        bandit.update(arm, reward, context)
    
    print("‚úì Training complete")
    
    # Display statistics
    print("\n" + "="*70)
    print("BANDIT STATISTICS AFTER TRAINING")
    print("="*70)
    
    stats = bandit.get_statistics()
    
    print("\nGlobal Performance:")
    print(f"  Total Selections: {stats['total_selections']}")
    print(f"  Best Arm: Roadmap Variant {stats['best_arm'] + 1}")
    
    print("\n  Q-Values (Estimated Quality):")
    roadmap_names = [
        "Variant 1: Fast-Track",
        "Variant 2: Balanced",
        "Variant 3: Self-Paced",
        "Variant 4: Project-Based"
    ]
    for i, (name, q_val) in enumerate(zip(roadmap_names, stats['q_values'])):
        print(f"    {name}: {q_val:.3f}")
    
    print("\n  Selection Counts:")
    for i, (name, count) in enumerate(zip(roadmap_names, stats['arm_counts'])):
        print(f"    {name}: {int(count)} times")
    
    # Test selections for different user types
    print("\n" + "="*70)
    print("TESTING RECOMMENDATIONS FOR DIFFERENT USERS")
    print("="*70)
    
    test_users = [
        {'experience_level': 'beginner', 'time_commitment': 'part-time'},
        {'experience_level': 'intermediate', 'time_commitment': 'full-time'},
        {'experience_level': 'advanced', 'time_commitment': 'part-time'},
    ]
    
    for user in test_users:
        print(f"\nüë§ User: {user['experience_level']}, {user['time_commitment']}")
        
        # Get 10 recommendations
        recommendations = []
        for _ in range(10):
            arm = bandit.select_arm(user)
            recommendations.append(arm)
        
        # Count selections
        from collections import Counter
        counts = Counter(recommendations)
        
        print("  Recommended roadmaps (out of 10 selections):")
        for arm_id, count in sorted(counts.items(), key=lambda x: -x[1]):
            print(f"    {roadmap_names[arm_id]}: {count} times")
    
    # Simulate real-time learning
    print("\n" + "="*70)
    print("SIMULATING REAL-TIME LEARNING")
    print("="*70)
    
    print("\nSimulating 20 new user interactions...")
    
    for i in range(20):
        # Random user
        user = np.random.choice(test_users)
        
        # Bandit selects roadmap
        selected_arm = bandit.select_arm(user)
        
        # Simulate user feedback (higher ratings for better matches)
        if user['experience_level'] == 'beginner' and selected_arm in [1, 2]:
            feedback = np.random.choice([4, 5])
        elif user['experience_level'] == 'advanced' and selected_arm == 0:
            feedback = 5
        elif selected_arm == 3:  # Project-based universally good
            feedback = np.random.choice([4, 5])
        else:
            feedback = np.random.randint(2, 5)
        
        # Update bandit
        bandit.update(selected_arm, feedback, user)
        
        if (i + 1) % 5 == 0:
            print(f"  Processed {i + 1}/20 interactions...")
    
    print("\n‚úì Real-time learning simulation complete")
    
    # Show updated statistics
    stats_after = bandit.get_statistics()
    print(f"\n  Total Selections: {stats_after['total_selections']}")
    print(f"  Best Arm Now: Roadmap Variant {stats_after['best_arm'] + 1}")
    
    # Save bandit
    print("\n[Step 4] Saving bandit state...")
    bandit.save('contextual_bandit.pkl')
    
    # Test loading
    print("\n[Step 5] Testing load functionality...")
    loaded_bandit = ContextualBandit.load('contextual_bandit.pkl')
    print("‚úì Bandit loaded successfully")
    
    print("\n" + "="*70)
    print("CONTEXTUAL BANDITS - TEST COMPLETE ‚úì")
    print("="*70)
    
    print("\nKey Features:")
    print("  ‚úì Contextual awareness (adapts to user type)")
    print("  ‚úì Exploration vs Exploitation (epsilon-greedy)")
    print("  ‚úì UCB selection strategy")
    print("  ‚úì Real-time learning from feedback")
    print("  ‚úì Save/Load functionality")
    
    print("\nSaved artifacts:")
    print("  - contextual_bandit.pkl (trained model)")

In [None]:
"""
PathFinder AI - Phase 3: Complete Roadmap System
Combines: Gemini Roadmap Generation + Contextual Bandits + Skill Gap Analysis
"""

import json
import os
# from phase3_gemini_roadmap_generator import GeminiRoadmapGenerator
# from phase3_contextual_bandits import ContextualBandit

class RoadmapRecommendationSystem:
    """Complete roadmap recommendation system with RL"""
    
    def __init__(self, gemini_api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg", bandit_path='contextual_bandit.pkl'):
        """
        Initialize the system
        
        Parameters:
        gemini_api_key: Gemini API key
        bandit_path: path to saved bandit model
        """
        self.roadmap_generator = GeminiRoadmapGenerator(api_key=gemini_api_key)
        
        # Try to load existing bandit, or create new one
        try:
            self.bandit = ContextualBandit.load(bandit_path)
            print("‚úì Loaded existing Contextual Bandit")
        except:
            self.bandit = ContextualBandit(n_arms=4, epsilon=0.1)
            print("‚úì Initialized new Contextual Bandit")
    
    def get_personalized_roadmap(self, user_profile):
        """
        Get best roadmap for user using Gemini + RL
        
        Steps:
        1. Generate 4 roadmap variants with Gemini
        2. Use Contextual Bandit to select best one
        3. Return selected roadmap
        
        Parameters:
        user_profile: dict with user info
        
        Returns:
        dict with selected roadmap and all variants
        """
        print("\n" + "="*70)
        print("PERSONALIZED ROADMAP RECOMMENDATION")
        print("="*70)
        
        # Step 1: Generate roadmaps with Gemini
        print("\n[Step 1] Generating 4 roadmap variants with Gemini...")
        roadmaps = self.roadmap_generator.generate_roadmaps(user_profile)
        
        if not roadmaps or len(roadmaps) == 0:
            print("‚ùå Failed to generate roadmaps")
            return None
        
        print(f"‚úì Generated {len(roadmaps)} roadmap variants")
        
        # Step 2: Contextual Bandit selects best one
        print("\n[Step 2] Selecting best roadmap with Contextual Bandit...")
        
        user_context = {
            'experience_level': user_profile.get('experience_level', 'intermediate'),
            'time_commitment': user_profile.get('time_commitment', 'part-time')
        }
        
        selected_arm = self.bandit.select_arm(user_context)
        selected_roadmap = roadmaps[selected_arm] if selected_arm < len(roadmaps) else roadmaps[0]
        
        print(f"‚úì Selected: {selected_roadmap['roadmap_name']}")
        
        return {
            'selected_roadmap': selected_roadmap,
            'selected_arm': selected_arm,
            'all_roadmaps': roadmaps,
            'user_context': user_context
        }
    
    def submit_feedback(self, arm_id, rating, user_context):
        """
        User submits feedback on roadmap
        
        Parameters:
        arm_id: which roadmap was shown (0-3)
        rating: user rating (1-5 stars or 0/1 for like/dislike)
        user_context: user profile
        """
        self.bandit.update(arm_id, rating, user_context)
        print(f"\n‚úì Feedback recorded: Roadmap {arm_id + 1} rated {rating}/5")
    
    def analyze_skill_gap(self, user_skills, required_skills):
        """Analyze skill gap using Gemini"""
        print("\n[Analyzing Skill Gap]")
        gap_analysis = self.roadmap_generator.analyze_skill_gap(
            user_skills, required_skills
        )
        print("‚úì Skill gap analysis complete")
        return gap_analysis
    
    def get_statistics(self):
        """Get bandit statistics"""
        return self.bandit.get_statistics()
    
    def save(self, bandit_path='contextual_bandit.pkl'):
        """Save bandit state"""
        self.bandit.save(bandit_path)


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 3: COMPLETE ROADMAP SYSTEM - TEST")
    print("="*70)
    
    # Check for Gemini API key
    gemini_key = "AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"
    if not gemini_key:
        print("\n‚ö†Ô∏è GEMINI_API_KEY not found")
        print("Set it with: export GEMINI_API_KEY='your-key'")
        print("\nThis test requires Gemini API access.")
        exit(1)
    
    # Initialize system
    print("\n[Initializing System]")
    system = RoadmapRecommendationSystem(gemini_api_key=gemini_key)
    
    # Test Case 1: Junior Developer ‚Üí Senior Developer
    print("\n\n" + "="*70)
    print("TEST CASE 1: JUNIOR ‚Üí SENIOR DEVELOPER")
    print("="*70)
    
    user_profile_1 = {
        'target_career': 'Senior Software Engineer',
        'current_skills': ['Python', 'JavaScript', 'React', 'Git', 'HTML', 'CSS'],
        'missing_skills': ['System Design', 'Microservices', 'Kubernetes', 'AWS', 
                          'Team Leadership', 'Code Architecture'],
        'experience_level': 'intermediate',
        'time_commitment': 'part-time'
    }
    
    result_1 = system.get_personalized_roadmap(user_profile_1)
    
    if result_1:
        selected = result_1['selected_roadmap']
        
        print("\nüìã SELECTED ROADMAP:")
        print(f"  Name: {selected['roadmap_name']}")
        print(f"  Duration: {selected['duration_months']} months")
        print(f"  Difficulty: {selected['difficulty']}")
        print(f"  Description: {selected['description']}")
        
        print(f"\n  Steps ({len(selected['steps'])} total):")
        for step in selected['steps'][:3]:
            print(f"\n    {step['step_number']}. {step['title']}")
            print(f"       Duration: {step['duration_weeks']} weeks")
            print(f"       Skills: {', '.join(step['skills_gained'][:3])}")
        
        if len(selected['steps']) > 3:
            print(f"\n    ... and {len(selected['steps']) - 3} more steps")
        
        print("\nüìö ALL AVAILABLE ROADMAPS:")
        for i, roadmap in enumerate(result_1['all_roadmaps'], 1):
            print(f"  {i}. {roadmap['roadmap_name']} ({roadmap['duration_months']} months)")
        
        # Simulate user feedback
        print("\n[User Feedback] User rates this roadmap...")
        user_rating = 5  # Positive feedback
        system.submit_feedback(
            arm_id=result_1['selected_arm'],
            rating=user_rating,
            user_context=result_1['user_context']
        )
    
    # Test Case 2: Career Changer ‚Üí Data Scientist
    print("\n\n" + "="*70)
    print("TEST CASE 2: CAREER CHANGE ‚Üí DATA SCIENTIST")
    print("="*70)
    
    user_profile_2 = {
        'target_career': 'Data Scientist',
        'current_skills': ['Excel', 'SQL', 'Statistics'],
        'missing_skills': ['Python', 'Machine Learning', 'Deep Learning', 'TensorFlow',
                          'Data Visualization', 'Pandas', 'NumPy'],
        'experience_level': 'beginner',
        'time_commitment': 'full-time'
    }
    
    result_2 = system.get_personalized_roadmap(user_profile_2)
    
    if result_2:
        selected = result_2['selected_roadmap']
        
        print("\nüìã SELECTED ROADMAP:")
        print(f"  Name: {selected['roadmap_name']}")
        print(f"  Duration: {selected['duration_months']} months")
        print(f"  Description: {selected['description']}")
        
        # Simulate feedback
        user_rating = 4
        system.submit_feedback(
            arm_id=result_2['selected_arm'],
            rating=user_rating,
            user_context=result_2['user_context']
        )
    
    # Test Skill Gap Analysis
    print("\n\n" + "="*70)
    print("TEST: SKILL GAP ANALYSIS")
    print("="*70)
    
    gap_analysis = system.analyze_skill_gap(
        user_skills=['Python', 'JavaScript', 'React'],
        required_skills=['Python', 'JavaScript', 'React', 'TypeScript', 
                        'Node.js', 'Docker', 'AWS', 'System Design']
    )
    
    print("\n‚úÖ TRANSFERABLE SKILLS:")
    for skill in gap_analysis.get('transferable_skills', []):
        print(f"  ‚Ä¢ {skill}")
    
    print("\nüìö SKILLS TO LEARN:")
    for skill_info in gap_analysis.get('missing_skills', [])[:4]:
        print(f"\n  ‚Ä¢ {skill_info['skill']}")
        print(f"    Priority: {skill_info.get('priority', 'N/A')}")
        print(f"    Time: {skill_info.get('learning_time_weeks', 'N/A')} weeks")
    
    # Show Bandit Statistics
    print("\n\n" + "="*70)
    print("CONTEXTUAL BANDIT STATISTICS")
    print("="*70)
    
    stats = system.get_statistics()
    
    print("\nOverall Performance:")
    print(f"  Total Recommendations: {stats['total_selections']}")
    print(f"  Best Performing Roadmap: Variant {stats['best_arm'] + 1}")
    
    print("\n  Q-Values (Quality Estimates):")
    for i, q_val in enumerate(stats['q_values'], 1):
        print(f"    Variant {i}: {q_val:.3f}")
    
    # Save system
    print("\n[Saving System State]")
    system.save('contextual_bandit.pkl')
    print("‚úì System state saved")
    
    # Export results
    output_data = {
        'test_case_1': {
            'user_profile': user_profile_1,
            'selected_roadmap': result_1['selected_roadmap'] if result_1 else None
        },
        'test_case_2': {
            'user_profile': user_profile_2,
            'selected_roadmap': result_2['selected_roadmap'] if result_2 else None
        },
        'skill_gap_analysis': gap_analysis,
        'bandit_statistics': stats
    }
    
    with open('phase3_test_results.json', 'w') as f:
        json.dump(output_data, f, indent=2)
    
    print("\n" + "="*70)
    print("PHASE 3 COMPLETE SYSTEM - TEST COMPLETE ‚úì")
    print("="*70)
    
    print("\nComponents Working:")
    print("  ‚úì Gemini Roadmap Generation (4 variants)")
    print("  ‚úì Contextual Bandits (RL selection)")
    print("  ‚úì Skill Gap Analysis")
    print("  ‚úì Feedback Learning")
    print("  ‚úì Context-Aware Recommendations")
    
    print("\nSaved Outputs:")
    print("  - phase3_test_results.json")
    print("  - contextual_bandit.pkl")

In [None]:
"""
PathFinder AI - Phase 3: Gemini Roadmap Generator
Generates 3-4 personalized career roadmap variants
"""

import os
import json
from google import genai
from google.genai import types

class GeminiRoadmapGenerator:
    """Generate career roadmaps using Gemini API"""
    
    def __init__(self, api_key="AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"):
        """Initialize Gemini client"""
        if api_key:
            self.api_key = api_key
        else:
            self.api_key = os.environ.get("GEMINI_API_KEY")
        
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY not found")
        
        self.client = genai.Client(api_key=self.api_key)
        self.model = "gemini-flash-latest"
    
    def generate_single_roadmap(self, user_profile, variant_type, roadmap_id):
        """
        Generate a single roadmap variant
        
        Parameters:
        user_profile: dict with user info
        variant_type: which variant to generate (1-4)
        roadmap_id: ID for this roadmap (1-4)
        
        Returns:
        dict with single roadmap
        """
        
        target_career = user_profile.get('target_career', 'Software Engineer')
        current_skills = user_profile.get('current_skills', [])
        missing_skills = user_profile.get('missing_skills', [])
        experience_level = user_profile.get('experience_level', 'intermediate')
        time_commitment = user_profile.get('time_commitment', 'part-time')
        
        # Define variant characteristics
        variants = {
            1: {
                'name': 'Fast-Track Intensive Path',
                'description': 'Aggressive timeline with intensive daily study (6-8 months)',
                'approach': 'bootcamp-style, intensive learning, rapid skill acquisition'
            },
            2: {
                'name': 'Balanced Structured Path',
                'description': 'Moderate pace with structured curriculum (9-12 months)',
                'approach': 'balanced learning, structured progression, steady growth'
            },
            3: {
                'name': 'Self-Paced Flexible Path',
                'description': 'Relaxed timeline with flexible scheduling (12-18 months)',
                'approach': 'flexible learning, self-paced study, adaptable schedule'
            },
            4: {
                'name': 'Project-Based Practical Path',
                'description': 'Learn by building real projects (10-12 months)',
                'approach': 'hands-on projects, practical application, learning by doing'
            }
        }
        
        variant = variants[variant_type]
        
        prompt = f"""You are a career roadmap expert. Generate ONE detailed learning roadmap for becoming a {target_career}.

USER PROFILE:
- Target Career: {target_career}
- Current Skills: {', '.join(current_skills) if current_skills else 'None'}
- Skills to Learn: {', '.join(missing_skills) if missing_skills else 'Various'}
- Experience Level: {experience_level}
- Time Commitment: {time_commitment}

ROADMAP TYPE: {variant['name']}
Approach: {variant['approach']}
Description: {variant['description']}

REQUIREMENTS:
1. Create a roadmap with:
   - roadmap_id: {roadmap_id}
   - roadmap_name: "{variant['name']}"
   - description: 1-2 sentences describing this specific path
   - duration_months: realistic time estimate
   - difficulty: "beginner", "intermediate", or "advanced"
   - steps: array of 5-6 learning steps (keep it concise)

2. Each step must have:
   - step_number: sequential number
   - title: clear, specific step name
   - description: brief 1-sentence what to learn
   - duration_weeks: time for this step
   - resources: 2 specific learning resources
   - skills_gained: 2-4 skills learned

OUTPUT FORMAT (JSON only, no markdown):
{{
  "roadmap_id": {roadmap_id},
  "roadmap_name": "{variant['name']}",
  "description": "brief description here",
  "duration_months": 6,
  "difficulty": "intermediate",
  "steps": [
    {{
      "step_number": 1,
      "title": "Step title",
      "description": "Brief description",
      "duration_weeks": 3,
      "resources": ["Resource 1", "Resource 2"],
      "skills_gained": ["Skill1", "Skill2"]
    }}
  ]
}}

Return ONLY valid JSON."""

        try:
            contents = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=prompt)],
                ),
            ]
            
            generate_content_config = types.GenerateContentConfig(
                temperature=0.7,
                top_p=0.95,
                top_k=40
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=contents,
                config=generate_content_config,
            )
            
            response_text = response.text.strip()
            
            # Clean markdown
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()
            
            # Parse JSON
            roadmap = json.loads(response_text)
            
            return roadmap
        
        except json.JSONDecodeError as e:
            print(f"Warning: Failed to parse roadmap {roadmap_id}: {e}")
            return None
        
        except Exception as e:
            print(f"Warning: Error generating roadmap {roadmap_id}: {e}")
            return None
    
    def generate_roadmaps(self, user_profile):
        """
        Generate 4 roadmap variants by calling API 4 times
        
        Parameters:
        user_profile: dict containing user info
        
        Returns:
        list of 4 roadmap variants
        """
        
        print("Generating 4 roadmap variants (this takes ~20-30 seconds)...")
        
        roadmaps = []
        
        for variant_id in range(1, 5):
            print(f"  Generating Variant {variant_id}/4...", end=" ")
            
            roadmap = self.generate_single_roadmap(user_profile, variant_id, variant_id)
            
            if roadmap:
                roadmaps.append(roadmap)
                print("‚úì")
            else:
                print("‚úó (failed)")
        
        print(f"Successfully generated {len(roadmaps)}/4 roadmaps")
        
        return roadmaps
    
    def analyze_skill_gap(self, user_skills, required_skills):
        """
        Analyze skill gap between current and required skills
        
        Parameters:
        user_skills: list of current skills
        required_skills: list of required skills for career
        
        Returns:
        dict with gap analysis
        """
        
        prompt = f"""Analyze the skill gap for career transition.

CURRENT SKILLS:
{', '.join(user_skills) if user_skills else 'None'}

REQUIRED SKILLS:
{', '.join(required_skills)}

TASK:
1. Identify which current skills are transferable
2. List skills that need to be learned
3. Prioritize missing skills (high/medium/low priority)
4. Estimate learning time for each missing skill

OUTPUT FORMAT (JSON only):
{{
  "transferable_skills": ["skill1", "skill2"],
  "missing_skills": [
    {{
      "skill": "Machine Learning",
      "priority": "high",
      "learning_time_weeks": 8,
      "difficulty": "intermediate",
      "reason": "Core requirement for data science roles"
    }},
    ...
  ],
  "learning_path_summary": "Brief 2-3 sentence summary of recommended approach"
}}

Return ONLY valid JSON."""

        try:
            contents = [
                types.Content(
                    role="user",
                    parts=[types.Part.from_text(text=prompt)],
                ),
            ]
            
            generate_content_config = types.GenerateContentConfig(
                temperature=0.3
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=contents,
                config=generate_content_config,
            )
            
            response_text = response.text.strip()
            
            if response_text.startswith("```json"):
                response_text = response_text.replace("```json", "").replace("```", "").strip()
            elif response_text.startswith("```"):
                response_text = response_text.replace("```", "").strip()
            
            gap_analysis = json.loads(response_text)
            
            return gap_analysis
        
        except Exception as e:
            print(f"Error in skill gap analysis: {e}")
            return {
                "transferable_skills": [],
                "missing_skills": [],
                "learning_path_summary": "Unable to analyze skill gap"
            }


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 3: GEMINI ROADMAP GENERATOR - TEST")
    print("="*70)
    
    try:
        generator = GeminiRoadmapGenerator()
        
        # Test Case 1: Data Scientist Career
        print("\n" + "="*70)
        print("TEST 1: DATA SCIENTIST ROADMAP")
        print("="*70)
        
        user_profile_1 = {
            'target_career': 'Data Scientist',
            'current_skills': ['Python', 'SQL', 'Excel'],
            'missing_skills': ['Machine Learning', 'Statistics', 'Deep Learning', 'TensorFlow'],
            'experience_level': 'beginner',
            'time_commitment': 'part-time'
        }
        
        print("\nGenerating roadmaps (one-by-one for reliability)...")
        roadmaps_1 = generator.generate_roadmaps(user_profile_1)
        
        if len(roadmaps_1) == 0:
            print("\n‚ùå Failed to generate any roadmaps. Check API key and network.")
            exit(1)
        
        print(f"\n‚úì Successfully generated {len(roadmaps_1)} roadmap variants\n")
        
        for roadmap in roadmaps_1:
            print(f"\n{'='*70}")
            print(f"ROADMAP {roadmap['roadmap_id']}: {roadmap['roadmap_name']}")
            print(f"{'='*70}")
            print(f"Description: {roadmap['description']}")
            print(f"Duration: {roadmap['duration_months']} months")
            print(f"Difficulty: {roadmap['difficulty']}")
            print(f"\nSteps ({len(roadmap['steps'])} total):")
            
            for step in roadmap['steps'][:3]:  # Show first 3 steps
                print(f"\n  Step {step['step_number']}: {step['title']}")
                print(f"  Duration: {step['duration_weeks']} weeks")
                print(f"  Skills: {', '.join(step['skills_gained'])}")
                print(f"  Resources: {', '.join(step['resources'][:2])}")
            
            if len(roadmap['steps']) > 3:
                print(f"\n  ... and {len(roadmap['steps']) - 3} more steps")
        
        # Test Case 2: Skill Gap Analysis
        print("\n\n" + "="*70)
        print("TEST 2: SKILL GAP ANALYSIS")
        print("="*70)
        
        print("\nAnalyzing skill gap...")
        gap_analysis = generator.analyze_skill_gap(
            user_skills=['Python', 'SQL', 'Excel'],
            required_skills=['Python', 'SQL', 'Machine Learning', 'Statistics', 
                           'Deep Learning', 'TensorFlow', 'Data Visualization']
        )
        
        print("\n--- SKILL GAP ANALYSIS ---\n")
        
        print("‚úì TRANSFERABLE SKILLS:")
        for skill in gap_analysis.get('transferable_skills', []):
            print(f"  ‚Ä¢ {skill}")
        
        print("\nüìö SKILLS TO LEARN:")
        for skill_info in gap_analysis.get('missing_skills', [])[:5]:
            print(f"\n  ‚Ä¢ {skill_info['skill']}")
            print(f"    Priority: {skill_info['priority']}")
            print(f"    Time: {skill_info['learning_time_weeks']} weeks")
            print(f"    Difficulty: {skill_info['difficulty']}")
        
        print(f"\nüí° SUMMARY:")
        print(f"  {gap_analysis.get('learning_path_summary', 'N/A')}")
        
        # Save roadmaps to JSON
        output_data = {
            'user_profile': user_profile_1,
            'roadmaps': roadmaps_1,
            'skill_gap_analysis': gap_analysis
        }
        
        with open('generated_roadmaps.json', 'w') as f:
            json.dump(output_data, f, indent=2)
        
        print("\n" + "="*70)
        print("GEMINI ROADMAP GENERATOR - TEST COMPLETE ‚úì")
        print("="*70)
        print("\nOutputs:")
        print("  ‚úì 4 roadmap variants generated")
        print("  ‚úì Skill gap analysis complete")
        print("  ‚úì Results saved to generated_roadmaps.json")
        
    except ValueError as e:
        print(f"\n‚ùå ERROR: {e}")
        print("\nTo fix: Set GEMINI_API_KEY environment variable")
    
    except Exception as e:
        print(f"\n‚ùå Unexpected error: {e}")
        import traceback
        traceback.print_exc()

In [None]:
"""
PathFinder AI - Phase 4: Policy Gradient (RL Algorithm #2)
Suggests optimal next step in a learning roadmap based on user progress
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import json
import pickle
from collections import deque

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class PolicyNetwork(nn.Module):
    """Neural network for policy gradient"""
    
    def __init__(self, state_dim, action_dim, hidden_dim=128):
        """
        Initialize policy network
        
        Parameters:
        state_dim: dimension of state vector
        action_dim: number of possible actions (steps in roadmap)
        hidden_dim: size of hidden layers
        """
        super(PolicyNetwork, self).__init__()
        
        self.network = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, action_dim),
            nn.Softmax(dim=-1)
        )
    
    def forward(self, state):
        """Forward pass through network"""
        return self.network(state)


class RoadmapPolicyGradient:
    """
    Policy Gradient agent for roadmap step recommendations
    Learns optimal sequence of learning steps
    """
    
    def __init__(self, state_dim=20, max_roadmap_steps=10, learning_rate=0.001):
        """
        Initialize Policy Gradient agent
        
        Parameters:
        state_dim: size of state representation
        max_roadmap_steps: maximum steps in a roadmap
        learning_rate: learning rate for optimizer
        """
        self.state_dim = state_dim
        self.max_steps = max_roadmap_steps
        self.action_dim = max_roadmap_steps  # Each step is an action
        
        # Policy network
        self.policy_net = PolicyNetwork(state_dim, self.action_dim).to(device)
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        
        # Training memory
        self.saved_log_probs = []
        self.rewards = []
        self.gamma = 0.99  # Discount factor
        
        # Statistics
        self.episode_rewards = []
        self.episode_count = 0
    
    def create_state(self, user_progress):
        """
        Create state vector from user progress
        
        Parameters:
        user_progress: dict with:
            - completed_steps: list of completed step indices
            - current_skills: list of acquired skills
            - time_spent_weeks: total time invested
            - engagement_score: 0-1 (based on consistency)
            - difficulty_preference: 0-1 (easy to hard)
        
        Returns:
        state vector (numpy array)
        """
        # Initialize state vector
        state = np.zeros(self.state_dim)
        
        # Feature 1-10: Which steps completed (one-hot encoded)
        completed = user_progress.get('completed_steps', [])
        for step_idx in completed:
            if step_idx < 10:
                state[step_idx] = 1.0
        
        # Feature 11: Progress percentage
        state[10] = len(completed) / self.max_steps if self.max_steps > 0 else 0
        
        # Feature 12: Skills acquired count (normalized)
        current_skills = user_progress.get('current_skills', [])
        state[11] = len(current_skills) / 20.0  # Normalize assuming max 20 skills
        
        # Feature 13: Time spent (normalized to weeks)
        time_spent = user_progress.get('time_spent_weeks', 0)
        state[12] = min(time_spent / 52.0, 1.0)  # Normalize to 1 year max
        
        # Feature 14: Engagement score
        state[13] = user_progress.get('engagement_score', 0.5)
        
        # Feature 14: Difficulty preference
        state[14] = user_progress.get('difficulty_preference', 0.5)
        
        # Feature 15-19: Recent performance (last 5 steps)
        recent_performance = user_progress.get('recent_performance', [])
        for i, perf in enumerate(recent_performance[-5:]):
            if i < 5:
                state[15 + i] = perf
        
        return state
    
    def select_action(self, state):
        """
        Select next step to recommend
        
        Parameters:
        state: numpy array representing current state
        
        Returns:
        action (step index), log_prob
        """
        # Convert to tensor
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        # Get action probabilities
        probs = self.policy_net(state_tensor)
        
        # Sample action from distribution
        m = Categorical(probs)
        action = m.sample()
        
        # Save log probability for training
        self.saved_log_probs.append(m.log_prob(action))
        
        return action.item(), m.log_prob(action).item()
    
    def store_reward(self, reward):
        """Store reward for current step"""
        self.rewards.append(reward)
    
    def calculate_returns(self):
        """Calculate discounted returns"""
        returns = []
        R = 0
        
        # Calculate returns backwards
        for r in reversed(self.rewards):
            R = r + self.gamma * R
            returns.insert(0, R)
        
        # Normalize returns
        returns = torch.tensor(returns).to(device)
        returns = (returns - returns.mean()) / (returns.std() + 1e-8)
        
        return returns
    
    def update_policy(self):
        """Update policy using REINFORCE algorithm"""
        if len(self.rewards) == 0:
            return 0
        
        # Calculate returns
        returns = self.calculate_returns()
        
        # Calculate policy loss
        policy_loss = []
        for log_prob, R in zip(self.saved_log_probs, returns):
            policy_loss.append(-log_prob * R)
        
        # Update policy network
        self.optimizer.zero_grad()
        loss = torch.stack(policy_loss).sum()
        loss.backward()
        self.optimizer.step()
        
        # Clear memory
        loss_value = loss.item()
        del self.saved_log_probs[:]
        del self.rewards[:]
        
        return loss_value
    
    def train_episode(self, episode_data):
        """
        Train on one complete episode (user journey)
        
        Parameters:
        episode_data: list of (state, action, reward) tuples
        
        Returns:
        total reward, loss
        """
        total_reward = 0
        
        for state, action, reward in episode_data:
            # Store state-action-reward
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            probs = self.policy_net(state_tensor)
            m = Categorical(probs)
            
            # Recreate action tensor
            action_tensor = torch.tensor([action]).to(device)
            log_prob = m.log_prob(action_tensor)
            
            self.saved_log_probs.append(log_prob)
            self.rewards.append(reward)
            total_reward += reward
        
        # Update policy
        loss = self.update_policy()
        
        # Track statistics
        self.episode_rewards.append(total_reward)
        self.episode_count += 1
        
        return total_reward, loss
    
    def recommend_next_step(self, user_progress, roadmap_steps, completed_steps):
        """
        Recommend next step in roadmap
        
        Parameters:
        user_progress: dict with user info
        roadmap_steps: list of all steps in roadmap
        completed_steps: list of completed step indices
        
        Returns:
        recommended step dict, confidence score
        """
        # Create state
        state = self.create_state(user_progress)
        
        # Get action probabilities
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        with torch.no_grad():
            probs = self.policy_net(state_tensor).cpu().numpy()[0]
        
        # Mask completed steps
        available_steps = [i for i in range(len(roadmap_steps)) if i not in completed_steps]
        
        if not available_steps:
            return None, 0.0
        
        # Get probabilities for available steps
        available_probs = {i: probs[i] for i in available_steps}
        
        # Select step with highest probability
        best_step_idx = max(available_probs, key=available_probs.get)
        confidence = available_probs[best_step_idx]
        
        # Get step details
        recommended_step = roadmap_steps[best_step_idx]
        
        return recommended_step, float(confidence)
    
    def save(self, filepath='policy_gradient_model.pkl'):
        """Save model"""
        torch.save({
            'policy_net_state': self.policy_net.state_dict(),
            'optimizer_state': self.optimizer.state_dict(),
            'episode_rewards': self.episode_rewards,
            'episode_count': self.episode_count,
            'state_dim': self.state_dim,
            'max_steps': self.max_steps,
            'action_dim': self.action_dim
        }, filepath)
        print(f"Policy Gradient model saved to {filepath}")
    
    def load(self, filepath='policy_gradient_model.pkl'):
        """Load model"""
        checkpoint = torch.load(filepath, map_location=device)
        self.policy_net.load_state_dict(checkpoint['policy_net_state'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state'])
        self.episode_rewards = checkpoint['episode_rewards']
        self.episode_count = checkpoint['episode_count']
        print(f"Policy Gradient model loaded from {filepath}")
    
    def get_statistics(self):
        """Get training statistics"""
        if len(self.episode_rewards) == 0:
            return {
                'total_episodes': 0,
                'average_reward': 0,
                'recent_rewards': []
            }
        
        return {
            'total_episodes': self.episode_count,
            'average_reward': np.mean(self.episode_rewards),
            'recent_rewards': self.episode_rewards[-10:],
            'total_reward': sum(self.episode_rewards)
        }


# ============================================================================
# SYNTHETIC DATA GENERATOR
# ============================================================================

def generate_synthetic_episode(num_steps=6):
    """
    Generate synthetic user journey for training
    
    Simulates: User follows roadmap ‚Üí completes steps ‚Üí gets rewards
    
    Returns:
    list of (state, action, reward) tuples
    """
    episode = []
    completed_steps = []
    skills_acquired = []
    
    for step_idx in range(num_steps):
        # Create user progress state
        user_progress = {
            'completed_steps': completed_steps.copy(),
            'current_skills': skills_acquired.copy(),
            'time_spent_weeks': step_idx * 2,  # 2 weeks per step
            'engagement_score': np.random.uniform(0.6, 1.0),
            'difficulty_preference': np.random.uniform(0.3, 0.8),
            'recent_performance': [np.random.uniform(0.6, 1.0) for _ in range(min(step_idx, 5))]
        }
        
        # Create state vector
        agent = RoadmapPolicyGradient()
        state = agent.create_state(user_progress)
        
        # Action: recommend next step (sequential for now)
        action = step_idx
        
        # Reward: higher if user completes step successfully
        if np.random.random() < 0.8:  # 80% completion rate
            reward = 1.0  # Completed
            completed_steps.append(step_idx)
            skills_acquired.extend([f"skill_{step_idx}"])
        else:
            reward = -0.5  # Skipped/failed
        
        episode.append((state, action, reward))
    
    return episode


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 4: POLICY GRADIENT - TEST")
    print("="*70)
    
    # Initialize agent
    print("\n[Step 1] Initializing Policy Gradient agent...")
    agent = RoadmapPolicyGradient(state_dim=20, max_roadmap_steps=10)
    print(f"‚úì Agent initialized")
    print(f"  Device: {device}")
    print(f"  State dimension: {agent.state_dim}")
    print(f"  Action dimension: {agent.action_dim}")
    
    # Generate synthetic training data
    print("\n[Step 2] Generating synthetic training data...")
    num_episodes = 100
    synthetic_episodes = [generate_synthetic_episode(num_steps=6) for _ in range(num_episodes)]
    print(f"‚úì Generated {num_episodes} synthetic episodes")
    
    # Train agent
    print("\n[Step 3] Training Policy Gradient agent...")
    print("This may take 1-2 minutes...")
    
    for episode_idx, episode_data in enumerate(synthetic_episodes):
        total_reward, loss = agent.train_episode(episode_data)
        
        if (episode_idx + 1) % 20 == 0:
            avg_reward = np.mean(agent.episode_rewards[-20:])
            print(f"  Episode {episode_idx + 1}/{num_episodes} | Avg Reward: {avg_reward:.2f} | Loss: {loss:.4f}")
    
    print("‚úì Training complete")
    
    # Display statistics
    print("\n" + "="*70)
    print("TRAINING STATISTICS")
    print("="*70)
    
    stats = agent.get_statistics()
    print(f"\nTotal Episodes: {stats['total_episodes']}")
    print(f"Average Reward: {stats['average_reward']:.2f}")
    print(f"Total Reward: {stats['total_reward']:.2f}")
    print(f"\nRecent Rewards (last 10 episodes):")
    for i, reward in enumerate(stats['recent_rewards'], 1):
        print(f"  Episode {stats['total_episodes'] - 10 + i}: {reward:.2f}")
    
    # Test recommendation
    print("\n" + "="*70)
    print("TEST: NEXT STEP RECOMMENDATION")
    print("="*70)
    
    # Mock roadmap
    mock_roadmap = [
        {'step_number': 1, 'title': 'Python Fundamentals', 'duration_weeks': 3},
        {'step_number': 2, 'title': 'Data Structures', 'duration_weeks': 4},
        {'step_number': 3, 'title': 'Web Frameworks', 'duration_weeks': 3},
        {'step_number': 4, 'title': 'Database Design', 'duration_weeks': 3},
        {'step_number': 5, 'title': 'API Development', 'duration_weeks': 4},
        {'step_number': 6, 'title': 'Deployment', 'duration_weeks': 2},
    ]
    
    # Test user who completed first 2 steps
    test_user_progress = {
        'completed_steps': [0, 1],  # Completed steps 1 and 2
        'current_skills': ['Python', 'Data Structures', 'Algorithms'],
        'time_spent_weeks': 7,
        'engagement_score': 0.85,
        'difficulty_preference': 0.6,
        'recent_performance': [0.9, 0.8]
    }
    
    print("\nTest User Profile:")
    print(f"  Completed Steps: {len(test_user_progress['completed_steps'])}/6")
    print(f"  Current Skills: {', '.join(test_user_progress['current_skills'])}")
    print(f"  Time Spent: {test_user_progress['time_spent_weeks']} weeks")
    print(f"  Engagement: {test_user_progress['engagement_score']:.2f}")
    
    # Get recommendation
    recommended_step, confidence = agent.recommend_next_step(
        test_user_progress,
        mock_roadmap,
        test_user_progress['completed_steps']
    )
    
    if recommended_step:
        print(f"\n‚úì RECOMMENDED NEXT STEP:")
        print(f"  Step {recommended_step['step_number']}: {recommended_step['title']}")
        print(f"  Duration: {recommended_step['duration_weeks']} weeks")
        print(f"  Confidence: {confidence * 100:.1f}%")
    
    # Save model
    print("\n[Step 4] Saving model...")
    agent.save('policy_gradient_model.pkl')
    
    # Test loading
    print("\n[Step 5] Testing model loading...")
    new_agent = RoadmapPolicyGradient()
    new_agent.load('policy_gradient_model.pkl')
    print("‚úì Model loaded successfully")
    
    print("\n" + "="*70)
    print("POLICY GRADIENT - TEST COMPLETE ‚úì")
    print("="*70)
    
    print("\nKey Features:")
    print("  ‚úì Neural network policy")
    print("  ‚úì REINFORCE algorithm")
    print("  ‚úì Synthetic data training")
    print("  ‚úì Next step recommendations")
    print("  ‚úì Confidence scores")
    print("  ‚úì Save/Load functionality")
    
    print("\nSaved artifacts:")
    print("  - policy_gradient_model.pkl")

In [None]:
"""
PathFinder AI - Phase 4: Career Guidance Chatbot
Gemini-powered chatbot for answering career questions
"""

import os
import json
from google import genai
from google.genai import types
from collections import deque

class CareerGuidanceChatbot:
    """Conversational AI for career guidance using Gemini"""
    
    def __init__(self, api_key=None):
        """Initialize chatbot with Gemini API"""
        if api_key:
            self.api_key = api_key
        else:
            self.api_key = os.environ.get("GEMINI_API_KEY")
        
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY not found")
        
        self.client = genai.Client(api_key=self.api_key)
        self.model = "gemini-flash-latest"
        
        # Conversation history (last 10 messages)
        self.conversation_history = deque(maxlen=10)
        
        # System context
        self.system_context = """You are PathFinder AI, an expert career guidance counselor and educational advisor.

Your role:
- Help users with career decisions and planning
- Provide advice on skill development and learning paths
- Answer questions about job markets and industries
- Guide users through career transitions
- Offer encouragement and motivation

Your personality:
- Professional but friendly and approachable
- Supportive and encouraging
- Practical and action-oriented
- Honest about challenges while staying positive

Guidelines:
- Keep responses concise (2-4 sentences for simple questions, longer for complex ones)
- Provide specific, actionable advice when possible
- If you don't know something, say so honestly
- Ask clarifying questions when needed
- Reference the user's profile/progress when relevant"""
    
    def set_user_context(self, user_profile):
        """
        Set context about current user
        
        Parameters:
        user_profile: dict with:
            - name: user's name
            - target_career: desired career
            - current_skills: list of skills
            - completed_steps: roadmap progress
            - experience_level: beginner/intermediate/advanced
        """
        self.user_context = user_profile
    
    def chat(self, user_message, use_context=True):
        """
        Send message to chatbot and get response
        
        Parameters:
        user_message: string from user
        use_context: whether to include user profile context
        
        Returns:
        bot response string
        """
        
        # Build conversation context
        messages = []
        
        # Add system context
        context_message = self.system_context
        
        # Add user context if available and requested
        if use_context and hasattr(self, 'user_context'):
            uc = self.user_context
            context_message += f"\n\nCurrent User Profile:"
            context_message += f"\n- Name: {uc.get('name', 'User')}"
            context_message += f"\n- Target Career: {uc.get('target_career', 'Not specified')}"
            context_message += f"\n- Current Skills: {', '.join(uc.get('current_skills', [])[:5])}"
            context_message += f"\n- Experience Level: {uc.get('experience_level', 'Not specified')}"
            
            if uc.get('completed_steps'):
                context_message += f"\n- Roadmap Progress: {len(uc.get('completed_steps', []))} steps completed"
        
        # Add conversation history
        for role, content in self.conversation_history:
            messages.append(types.Content(
                role=role,
                parts=[types.Part.from_text(text=content)]
            ))
        
        # Add current user message with context
        full_user_message = f"{context_message}\n\nUser Question: {user_message}" if len(messages) == 0 else user_message
        
        messages.append(types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_user_message)]
        ))
        
        try:
            generate_content_config = types.GenerateContentConfig(
                temperature=0.7,  # Balanced creativity
                top_p=0.95,
                top_k=40
            )
            
            response = self.client.models.generate_content(
                model=self.model,
                contents=messages,
                config=generate_content_config,
            )
            
            bot_response = response.text.strip()
            
            # Save to conversation history
            self.conversation_history.append(("user", user_message))
            self.conversation_history.append(("model", bot_response))
            
            return bot_response
        
        except Exception as e:
            error_msg = f"I apologize, I'm having trouble processing your request right now. Error: {str(e)}"
            return error_msg
    
    def get_career_advice(self, current_situation, target_career):
        """
        Get specific career advice
        
        Parameters:
        current_situation: string describing current status
        target_career: desired career path
        
        Returns:
        advice string
        """
        prompt = f"""The user is currently: {current_situation}

They want to become: {target_career}

Provide 3-5 specific, actionable pieces of advice for making this transition. 
Focus on:
1. Most important skills to learn first
2. Practical next steps they can take this week
3. Common pitfalls to avoid
4. Realistic timeline

Keep the advice practical and encouraging."""
        
        return self.chat(prompt, use_context=False)
    
    def explain_skill(self, skill_name):
        """
        Explain what a skill is and why it's important
        
        Parameters:
        skill_name: name of the skill
        
        Returns:
        explanation string
        """
        prompt = f"""Explain the skill "{skill_name}" in simple terms:
1. What it is (1-2 sentences)
2. Why it's valuable in the job market
3. How long it typically takes to learn
4. Best way to start learning it

Keep it concise and practical."""
        
        return self.chat(prompt, use_context=False)
    
    def roadmap_encouragement(self, completed_steps, total_steps):
        """
        Provide encouragement based on progress
        
        Parameters:
        completed_steps: number of completed steps
        total_steps: total steps in roadmap
        
        Returns:
        encouragement message
        """
        progress_pct = (completed_steps / total_steps * 100) if total_steps > 0 else 0
        
        prompt = f"""The user has completed {completed_steps} out of {total_steps} steps in their learning roadmap ({progress_pct:.0f}% complete).

Provide a short, encouraging message (2-3 sentences) that:
1. Acknowledges their progress
2. Motivates them to continue
3. Reminds them of the value of consistency

Be genuine and avoid clich√©s."""
        
        return self.chat(prompt, use_context=True)
    
    def clear_history(self):
        """Clear conversation history"""
        self.conversation_history.clear()
    
    def export_conversation(self):
        """Export conversation history as list"""
        return list(self.conversation_history)


# ============================================================================
# TESTING
# ============================================================================

if __name__ == "__main__":
    
    print("="*70)
    print("PHASE 4: CAREER GUIDANCE CHATBOT - TEST")
    print("="*70)
    
    # Check API key
    api_key = "AIzaSyALQl3IlQPXT_dD8k5kvBA9j3aXenmfDAg"
    if not api_key:
        print("\n‚ö†Ô∏è GEMINI_API_KEY not found")
        print("Set it with: export GEMINI_API_KEY='your-key'")
        exit(1)
    
    # Initialize chatbot
    print("\n[Step 1] Initializing chatbot...")
    chatbot = CareerGuidanceChatbot(api_key=api_key)
    print("‚úì Chatbot initialized")
    
    # Set user context
    test_user = {
        'name': 'Alex',
        'target_career': 'Data Scientist',
        'current_skills': ['Python', 'SQL', 'Excel', 'Statistics'],
        'completed_steps': [0, 1, 2],  # 3 steps completed
        'experience_level': 'beginner'
    }
    
    chatbot.set_user_context(test_user)
    print(f"‚úì User context set for {test_user['name']}")
    
    # Test 1: Career advice
    print("\n" + "="*70)
    print("TEST 1: CAREER ADVICE")
    print("="*70)
    
    print("\nUser: I'm a software developer wanting to transition to Data Science")
    response = chatbot.get_career_advice(
        current_situation="Software Developer with 2 years experience in Python web development",
        target_career="Data Scientist"
    )
    print(f"\nBot: {response}")
    
    # Test 2: Skill explanation
    print("\n" + "="*70)
    print("TEST 2: SKILL EXPLANATION")
    print("="*70)
    
    print("\nUser: What is Machine Learning?")
    response = chatbot.explain_skill("Machine Learning")
    print(f"\nBot: {response}")
    
    # Test 3: General question
    print("\n" + "="*70)
    print("TEST 3: GENERAL QUESTION")
    print("="*70)
    
    print("\nUser: Should I get a Master's degree or learn through online courses?")
    response = chatbot.chat("Should I get a Master's degree or learn through online courses?")
    print(f"\nBot: {response}")
    
    # Test 4: Follow-up question (tests conversation history)
    print("\n" + "="*70)
    print("TEST 4: FOLLOW-UP QUESTION")
    print("="*70)
    
    print("\nUser: What about the cost difference?")
    response = chatbot.chat("What about the cost difference?")
    print(f"\nBot: {response}")
    
    # Test 5: Encouragement
    print("\n" + "="*70)
    print("TEST 5: PROGRESS ENCOURAGEMENT")
    print("="*70)
    
    print("\nUser Progress: 3/8 steps completed")
    response = chatbot.roadmap_encouragement(completed_steps=3, total_steps=8)
    print(f"\nBot: {response}")
    
    # Test 6: Context-aware question
    print("\n" + "="*70)
    print("TEST 6: CONTEXT-AWARE QUESTION")
    print("="*70)
    
    print("\nUser: What should I learn next based on my current skills?")
    response = chatbot.chat("What should I learn next based on my current skills?")
    print(f"\nBot: {response}")
    
    # Export conversation
    print("\n" + "="*70)
    print("CONVERSATION HISTORY")
    print("="*70)
    
    history = chatbot.export_conversation()
    print(f"\nTotal messages: {len(history)}")
    print("\nLast 3 exchanges:")
    for role, message in history[-6:]:
        speaker = "User" if role == "user" else "Bot"
        print(f"\n{speaker}: {message[:100]}..." if len(message) > 100 else f"\n{speaker}: {message}")
    
    print("\n" + "="*70)
    print("CHATBOT - TEST COMPLETE ‚úì")
    print("="*70)
    
    print("\nKey Features:")
    print("  ‚úì Context-aware responses (user profile)")
    print("  ‚úì Conversation history (last 10 messages)")
    print("  ‚úì Career advice function")
    print("  ‚úì Skill explanation function")
    print("  ‚úì Progress encouragement")
    print("  ‚úì Natural follow-up questions")
    
    print("\nReady for:")
    print("  - Integration with FastAPI backend")
    print("  - Real-time chat interface")
    print("  - User-specific guidance")