# Install necessary libararies

In [None]:
!pip install -U langchain-community
!pip install chromadb

# Data Transformation Functions

In [None]:
import json

def transform_user_data(data):
    transformed_data = {}

    # Convert Big Five Results
    if "bigFiveResult" in data:
        transformed_data["bigFiveResult"] = {
            key: score_to_category(value["score"])
            for key, value in data["bigFiveResult"].items()
        }

    # Copy simple fields if they exist
    for field in ["dateOfBirth", "location", "name", "finalCareer"]:
        if field in data:
            transformed_data[field] = data[field]

    # Transform education details
    if "educationDetails" in data:
        transformed_data["educationDetails"] = [
            {k: v for k, v in edu.items() if k in {"degree", "endYear", "fieldOfStudy", "institute", "startYear", "campus", "grade"}}
            for edu in data["educationDetails"]
        ]

    # Extract Holland result with scores >= 20
    if "hollandResult" in data:
        holland_sorted = sorted(data["hollandResult"], key=lambda x: x["score"], reverse=True)[:2]
        transformed_data["hollandResult"] = [entry["name"] for entry in holland_sorted]

    # Extract MBTI result
    if "mbtiResult" in data and "prediction" in data["mbtiResult"]:
        transformed_data["mbtiResult"] = data["mbtiResult"]["prediction"]

    # Transform recommendations
    if "recommendations" in data:
        transformed_data["recommendations"] = [
            {"Industry": rec["Industry"], "Weighted_Score": rec["Weighted_Score"]}
            for rec in data["recommendations"] if "Industry" in rec and "Weighted_Score" in rec
        ]

    return transformed_data

def score_to_category(score):
    if score >= 80:
        return 5
    elif score >= 70:
        return 4
    elif score >= 60:
        return 3
    elif score >= 50:
        return 2
    else:
        return 1

# Example usage


output_data = transform_user_data(input_data)
print(json.dumps(output_data, indent=4))


# API Key Configuration

In [None]:
import google.generativeai as genai
import os

API_KEY = os.getenv("GEMINI_API_KEY", "")
genai.configure(api_key=API_KEY)


# Initialize Conversation Memory

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.schema import messages_from_dict, messages_to_dict

# Initialize Conversation Memory with proper settings
memory = ConversationBufferMemory(
    memory_key="history",  # Define key for storing history
    return_messages=True  # Ensure messages are returned in correct format
)

  memory = ConversationBufferMemory(


# Load and Store Industry Skills in ChromaDB

In [None]:
import json
import chromadb
from sentence_transformers import SentenceTransformer

# Load JSON data
with open("cleaned_skills_by_industry.json", "r") as f:
    industry_skills = json.load(f)

# Initialize Sentence Transformer Model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Initialize ChromaDB client & collection
chroma_client = chromadb.PersistentClient(path="./chroma_db")  # Saves DB persistently
collection = chroma_client.get_or_create_collection(name="industry_skills")

# Convert JSON data into text and embeddings
for idx, (industry, data) in enumerate(industry_skills.items()):
    top_technical_skills = ", ".join(data.get("Top Technical Skills", []))
    top_soft_skills = ", ".join(data.get("Top Soft Skills", []))
    top_personality_traits = json.dumps(data.get("Top Personality Traits", []))
    industry_scope = json.dumps(data.get("Industry Scope", []))

    # Create a text representation
    text_representation = (
        f"Industry: {industry}. "
        f"Technical Skills: {top_technical_skills}. "
        f"Soft Skills: {top_soft_skills}. "
        f"Personality Traits: {top_personality_traits}. "
        f"Industry Scope: {industry_scope}."
    )

    # Store in ChromaDB
    collection.add(
        ids=[str(idx)],  # Unique ID for each record
        documents=[text_representation],
        metadatas=[{
            "industry": industry,
            "skills": top_technical_skills,
            "soft_skills": top_soft_skills,
            "personality": top_personality_traits,
            "industry_scope": industry_scope
        }],

    )

print("✅ Data stored successfully in ChromaDB!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:01<00:00, 75.1MiB/s]


✅ Data stored successfully in ChromaDB!


# ChromaDB Search Function

In [None]:
def search_chroma(query, k=5):
    # Load ChromaDB collection
    collection = chroma_client.get_collection(name="industry_skills")

    # Encode query
    query_embedding = model.encode(query).tolist()

    # Perform similarity search
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )

    # Extract matched industries with metadata safely
    search_results = []
    for meta in results["metadatas"][0]:
        search_results.append({
            "industry": meta.get("industry", "N/A"),
            "skills": meta.get("skills", "N/A"),
            "soft_skills": meta.get("soft_skills", "N/A"),
            "personality": meta.get("personality", "N/A"),
            "scope": meta.get("industry_scope", "N/A")  # Extract all industry scopes
        })

    return search_results

# Chatbot Logic and Prompt Definitions

In [None]:
import re
import json
import google.generativeai as genai
from langchain.memory import ConversationBufferMemory

# Replace with your actual API key
genai.configure(api_key=API_KEY)  # You should replace this with your actual API key

# Initialize conversation memory
memory = ConversationBufferMemory(memory_key="history", return_messages=True)  # Ensure message history is returned
# user_data = {
#     "bigFiveResult": {"A": 3, "C": 5, "E": 4, "N": 2, "O": 4},
#     "dateOfBirth": "01-01-1997",
#     "educationDetails": [
#         {"degree": "Matriculation", "endYear": 2026, "fieldOfStudy": "Computer Science", "institute": "Allied School", "startYear": 2022},
#         {"campus": "Lahore", "degree": "Bse", "endYear": "2025", "fieldOfStudy": "Computer Science", "grade": "A", "institute": "Comsats", "startYear": 2021}
#     ],
#     "finalCareer": "MARKETING & ADVERTISING",
#     "hollandResult": ["Artistic", "Investigative"],
#     "location": "Lahore",
#     "mbtiResult": "ESFJ",
#     "name": "Huzefa Abbasi",
#     "recommendations": [
#         {"Industry": "it & software development", "Weighted_Score": 0.85},
#         {"Industry": "marketing & advertising", "Weighted_Score": 0.56},
#         {"Industry": "sales & distribution", "Weighted_Score": 0.55},
#         {"Industry": "finance", "Weighted_Score": 0.5},
#         {"Industry": "healthcare", "Weighted_Score": 0.47}
#     ]
# }
user_data = {
    "bigFiveResult": {"A": 3, "C": 5, "E": 4, "N": 2, "O": 4},
    "dateOfBirth": "01-01-1997",
    "educationDetails": [
        {"degree": "Matriculation", "endYear": 2026, "fieldOfStudy": "Computer Science", "institute": "Allied School", "startYear": 2022},
        {"campus": "Lahore", "degree": "Bse", "endYear": "2025", "fieldOfStudy": "Computer Science", "grade": "A", "institute": "Comsats", "startYear": 2021}
    ],
    "hollandResult": ["Artistic", "Investigative"],
    "location": "Lahore",
    "mbtiResult": "ESFJ",
    "name": "Huzefa Abbasi",
    "recommendations": [
        {"Industry": "it & software development", "Weighted_Score": 0.85},
        {"Industry": "marketing & advertising", "Weighted_Score": 0.56},
        {"Industry": "sales & distribution", "Weighted_Score": 0.55},
        {"Industry": "finance", "Weighted_Score": 0.5},
        {"Industry": "healthcare", "Weighted_Score": 0.47}
    ]
}
# user_data = {
#     "bigFiveResult": {"A": 3, "C": 5, "E": 4, "N": 2, "O": 4},
#     "dateOfBirth": "01-01-1997",
#     "educationDetails": [
#         {"degree": "Matriculation", "endYear": 2026,  "institute": "Allied School", "startYear": 2022},
#         {"campus": "Lahore", "degree": "Bba", "endYear": "2025", "fieldOfStudy": "Business", "grade": "A", "institute": "Comsats", "startYear": 2021}
#     ],
#     "location": "Lahore",
#     "mbtiResult": "ESFJ",
#     "name": "Huzefa Abbasi",
# }
# user_data ={
#     "name": "Huzefa Abassi"
# }
has_final_career = bool(user_data.get("finalCareer"))
has_recommendations= bool(user_data.get("recommendations"))
has_personality_tests = any(user_data.get(k) for k in ["bigFiveResult", "hollandResult", "mbtiResult"])
has_education_details = bool(user_data.get("educationDetails"))
education_field = user_data["educationDetails"][-1]["fieldOfStudy"] if has_education_details else None

career_chatbot_prompt = {
"user_type_final":"""
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: Has a Final Career**
    - Ask clarifying questions when needed to better tailor advice, limiting to 1-2 per response.
    - Provide personalized career guidance based on user inputs, ensuring relevance.
    - Consider user data, including:
      - **Final Career Choice** → Align advice with the user's selected career path.
      - **Education Details** → Factor in degrees, institutions, and fields of study.
      - **Personality & Career Assessments** → Use Big Five, MBTI, and Holland results to refine recommendations.
      - **Location** → Consider regional job opportunities, industry growth, and relevant institutions.
      - **Career Recommendations & Weighted Scores** → Use provided industry scores to offer additional insights.
    - Include the following details in recommendations:
      - Education requirements (degrees, certifications, alternative paths).
      - Skills needed (technical & soft skills).
      - Job opportunities (salary range, industry demand, career growth).
    - Be conversational, empathetic, and engaging in tone.
    - Maintain context awareness by remembering previous conversations when responding.
""",
"user_type_has_recommendations":"""
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: Completed Personality Tests and Received Career Recommendations but Has Not Chosen a Final Career**
    - **Encourage the user to explore and reflect on their recommended careers.**
      - Discuss the strengths and weaknesses of each recommended industry based on their personality results.
      - Provide insights into **job roles, growth potential, and required skills** for the top-ranked careers.
    - **Help the user narrow down their options** by asking about:
      - Their personal interests and work preferences.
      - Any concerns or doubts about the recommended fields.
    - Consider user data, including:
      - **Education Details** → Factor in degrees, institutions, and fields of study.
      - **Personality & Career Assessments** → Use Big Five, MBTI, and Holland results to refine advice.
      - **Location** → Consider regional job opportunities and industry trends.
      - **Career Recommendations & Weighted Scores** → Explain why certain careers are ranked higher and how they align with the user’s traits.
    - **Provide additional career insights**, including:
      - Education requirements (degrees, certifications, alternative paths).
      - Skills needed (technical & soft skills).
      - Job market outlook (salary range, industry demand, career growth).
    - Be conversational, supportive, and engaging in tone.
    - Maintain context awareness by remembering previous conversations when responding.
""",
"user_type_atleast_one_personalityTest":"""
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: Has Taken at Least One Personality Test but No Final Career Recommendation**
    - Guide the user to complete missing career assessments (e.g., Big Five, MBTI, Holland) to provide a more accurate career recommendation.
    - If tests are incomplete, explain their importance and encourage completion.
    - Provide preliminary career insights based on existing test results, education, and location.
    - Consider user data, including:
      - **Education Details** → Factor in degrees, institutions, and fields of study.
      - **Personality & Career Assessments** → Use available test results to suggest career directions.
      - **Location** → Consider regional job opportunities, industry growth, and relevant institutions.
      - **Career Recommendations & Weighted Scores** → Use provided industry scores to highlight potential career paths.
    - Include the following details in recommendations:
      - Education requirements (degrees, certifications, alternative paths).
      - Skills needed (technical & soft skills).
      - Job opportunities (salary range, industry demand, career growth).
    - Be conversational, empathetic, and engaging in tone.
    - Maintain context awareness by remembering previous conversations when responding.
""",
"user_type_education_only":"""
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: Has Provided Education Details but No Personality Test**
    - Explain the **importance of personality tests** (e.g., Big Five, MBTI, Holland) in career decision-making.
      - Highlight how these tests help match careers with interests, strengths, and work preferences.
      - Encourage the user to take at least one test for a more personalized career recommendation.
    - Provide **general career guidance** based on the user's education level and field of study.
      - Suggest broad career paths that align with their education.
      - Mention alternative career options if they wish to explore other fields.
    - Consider user data, including:
      - **Education Details** → Use degrees, institutions, and fields of study to suggest relevant careers.
      - **Location (if available)** → Factor in regional job opportunities and industries.
    - Include the following details in recommendations:
      - Education requirements (degrees, certifications, alternative paths).
      - Skills needed (technical & soft skills).
      - Job opportunities (salary range, industry demand, career growth).
    - Be conversational, empathetic, and engaging in tone.
    - Maintain context awareness by remembering previous conversations when responding.
""",
"user_type_new_signup":"""
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: New Signup (Only Provided Name)**
    - **Introduce yourself** and explain how career counseling works.
    - **Guide the user to provide necessary details** to offer personalized career advice:
      - **Education details** (e.g., degrees, institutions, fields of study).
      - **Personality test results** (e.g., Big Five, MBTI, Holland) for better career matching.
      - **Career interests or preferences**, if they have any.
    - Explain the **benefits of career counseling**, including:
      - Matching careers to skills, education, and personality.
      - Identifying strengths and potential career paths.
      - Providing insights on education, required skills, and job opportunities.
    - Be conversational, friendly, and engaging to make the user feel comfortable.
    - Maintain context awareness by remembering previous conversations when responding.
"""
}
industry_prompt = """
Extract the **industry or field** the user is referring to in this career-related query.

Query: "{query}"

Respond with only the industry name (e.g., 'Software Engineering', 'Healthcare', 'Finance'). If no clear industry is found, respond with 'General Career'.
"""
# Define the city prompt that was missing
city_prompt = """
Extract the **city or location** the user is referring to in this query.

Query: "{query}"

Respond with only the city name. If no clear city is mentioned, respond with 'None'.
"""

def get_classification_prompt(query):
    if has_final_career:
        active_prompt = career_chatbot_prompt["user_type_final"]
    elif has_recommendations:
        active_prompt= career_chatbot_prompt["user_type_has_recommendations"]
    elif has_personality_tests:
        active_prompt = career_chatbot_prompt["user_type_atleast_one_personalityTest"]
    elif has_education_details:
        active_prompt = career_chatbot_prompt["user_type_education_only"]
    else:
        active_prompt = career_chatbot_prompt["user_type_new_signup"]

    return f"""
    {active_prompt}
    Now, classify the following user query into one of these categories:
        1. Greeting
        2. Career-related
        3. University-related
        4. Job scope-related
        5. Unrelated

        User Query: "{query}"
        Respond with only the number '1', '2', '3', '4', or '5' without any additional text.
    """

def classify_query_extract_city_and_industry(query):
    model = genai.GenerativeModel("gemini-2.0-flash")

    # Query Classification
    classification_prompt = get_classification_prompt(query)
    print("debug",classification_prompt)
    classification_response = model.generate_content(classification_prompt)
    category_num = classification_response.text.strip() if classification_response and classification_response.text else "5"

    category_map = {
        "1": "greeting",
        "2": "career",
        "3": "university",
        "4": "scope",
        "5": "unrelated"
    }
    classified_category = category_map.get(category_num, "unrelated")

    # If it's a greeting or unrelated, return early with "None"
    if classified_category in ["greeting", "unrelated"]:
        return {
            "category": classified_category,
            "city": "None",
            "industry": "None"
        }

    # City Extraction
    city_response = model.generate_content(city_prompt.format(query=query))
    extracted_city = city_response.text.strip() if city_response and city_response.text else "None"

    # Industry Extraction (only if career-related)
    extracted_industry = "None"
    if classified_category in ["career", "scope"]:
        industry_response = model.generate_content(industry_prompt.format(query=query))
        extracted_industry = industry_response.text.strip() if industry_response and industry_response.text else "General Career"

    return {
        "category": classified_category,
        "city": extracted_city,
        "industry": extracted_industry
    }
def format_user_recommendations():
    if not user_data.get("recommendations"):
        return "No career recommendations available."

    formatted_recs = "User Career Recommendations:\n"
    for rec in user_data["recommendations"]:
        formatted_recs += f"- {rec['Industry']} (Score: {rec['Weighted_Score']})\n"
    print("debug",formatted_recs)
    return formatted_recs

# Generate industry information based on detected industry
def get_industry_data(query, city=None):
    # Use ChromaDB to search for related industry skills
    chroma_results = search_chroma(query, k=1)  # Fetch most relevant industry
    if chroma_results:
        # Get classification results
        classification_results = classify_query_extract_city_and_industry(query)
        # category = classification_results["category"]
        city = classification_results["city"] if classification_results["city"] != "None" else None
        # industry = classification_results["industry"]

        industry_data = {
            "Industry": chroma_results[0]["industry"],
            "Top Skills": chroma_results[0]["skills"],
            "Soft Skills": chroma_results[0]["soft_skills"],
        }

        # If city is provided, extract and add scope
        if city and city != "None":
            industry_scope_data = json.loads(chroma_results[0]["scope"])  # Convert scope string to dictionary
            city_scope = industry_scope_data.get(city, "Not available")
            industry_data["Local Market"] = f"Job market scope for {city}: {city_scope}"

    else:
        # Fallback to Gemini industry detection
          industry_data = {
              # "Industry": industry if 'industry' in locals() else "General Career",
              # "Top Skills": ["Generic skills for " + (industry if 'industry' in locals() else "General Career")],
          }
    print("debug",industry_data)
    return industry_data

def get_university_suggestions(query):
    model = genai.GenerativeModel("gemini-2.0-flash")
    user_city = user_data.get("location", "Unknown")
    university_prompt = f"""
    You are an AI career advisor. The user wants to know about universities.

    **User City**: {user_city}
    **Query**: "{query}"

    If the city is available, provide information about universities **in that city**.
    If the city is missing, provide **general university recommendations** for their field.
    """
    response = model.generate_content(university_prompt)
    return response.text.strip() if response and response.text else "I couldn't find university details."

# Generate response for user query
def generate_response(query):
    model = genai.GenerativeModel("gemini-2.0-flash")

    # Load conversation history for the current session only
    session_memory = memory.load_memory_variables({}).get("history", "")

    # Reset conversation if the user starts a new chat
    if query.lower() in ["hi", "hello", "start over"]:
        memory.clear()
        session_memory = ""

    # Classify query and get relevant information
    classification_results = classify_query_extract_city_and_industry(query)
    category = classification_results["category"]
    city = classification_results["city"] if classification_results["city"] != "None" else None
    industry = classification_results["industry"]

    print("Debug - Detected Industry:", industry)

    # If not career-related, return a polite response without extracting industry data
    if category == "greeting":
        response_text = "Hi there! I'm here to help you navigate your career path. How can I assist you today?"
        memory.save_context({"input": query}, {"output": response_text})
        return response_text
    elif category == "university":
        response_text = get_university_suggestions(query)
        memory.save_context({"input": query}, {"output": response_text})
        return response_text
    elif category not in ["career", "scope"]:
        response_text = "I'm a career guidance specialist and can't help with that topic. I'd be happy to discuss your career goals, education paths, or job opportunities instead."
        memory.save_context({"input": query}, {"output": response_text})
        return response_text

    # Get industry data only if it's a career-related query
    industry_data = get_industry_data(query, city) if industry and industry != "None" and industry!="General Career" else None

    # Construct prompt
    combined_prompt = f"""
    User Query: {query}
    Detected City: {city or "Unknown"}
    Detected Industry: {industry or "General"}

    User Information:
    - Name: {user_data.get('name', 'Unknown')}
    - Location: {user_data.get('location', 'Unknown')}
    - Education: {education_field or 'Unknown field'}
    - MBTI: {user_data.get('mbtiResult', 'Unknown')}
    - Big Five: {user_data.get('bigFiveResult', {})}
    - Holland: {user_data.get('hollandResult', [])}
    - Career Recommendations: {format_user_recommendations() or 'No career recommendations available.'}
    - Final Career: {user_data.get('finalCareer', 'None')}


    Here's relevant career information based on the query:\n{industry_data}

    As a career counselor, your goal is to provide expert guidance in a professional yet friendly manner.
    - If this is the first interaction, introduce yourself.
    - Offer practical career advice based on the provided industry data.
    - Ask an open-ended question to continue the conversation.

    Respond conversationally, like a real career counselor would.
    """

    # Generate response from the model
    response = model.generate_content(combined_prompt)
    response_text = response.text.strip() if response and response.text else "I'm not sure how to respond to that."

    # Debugging: Check stored session memory
    print("Debug - Current Session Memory:", session_memory)

    # Save updated conversation history for the session
    memory.save_context({"input": query}, {"output": response_text})

    return response_text

# Example usage
def main():
    # Interactive mode
    print("\n--- Career Guidance Chatbot ---")
    print("Type 'exit' to end the conversation.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Chatbot: Thank you for using the career guidance chatbot. Good luck with your career journey!")
            break

        bot_response = generate_response(user_input)
        print(f"Chatbot: {bot_response}")

if __name__ == "__main__":
    main()


--- Career Guidance Chatbot ---
Type 'exit' to end the conversation.

You: hi
debug 
    
    You are Honor, an AI-powered career counseling chatbot helping users explore career options for all age groups and education levels.
    ### INSTRUCTIONS (not to be repeated to users):
    **User Type: Completed Personality Tests and Received Career Recommendations but Has Not Chosen a Final Career**
    - **Encourage the user to explore and reflect on their recommended careers.**
      - Discuss the strengths and weaknesses of each recommended industry based on their personality results.
      - Provide insights into **job roles, growth potential, and required skills** for the top-ranked careers.
    - **Help the user narrow down their options** by asking about:
      - Their personal interests and work preferences.
      - Any concerns or doubts about the recommended fields.
    - Consider user data, including:
      - **Education Details** → Factor in degrees, institutions, and fields of s