In [None]:
from google.colab import drive
from IPython.display import display, HTML, Javascript
import random
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers torch
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m89.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[

In [None]:
model_path = '/content/drive/MyDrive/chatbot_model/checkpoint-1000'
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set the pad_token to eos_token for compatibility
tokenizer.pad_token = tokenizer.eos_token

In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/Occupations.csv', low_memory=False, encoding='latin-1')

# Optional: Clean the dataframe as you did in the training code
df = df.loc[:, ~df.columns.str.contains('^Unnamed', case=False)]
df.columns = df.columns.str.strip().str.lower()
df.dropna(subset=['job', 'detailed work activity', 'interest description', 'task', 'skills'], inplace=True)


In [None]:
import random

# Initialize conversation state
conversation_state = {}

def generate_career_guidance(prompt):
    global conversation_state

    # Handle the quit condition
    if 'quit' in prompt.lower():
        conversation_state.clear()
        return "Goodbye! All the best for your future!"

    # Handle recommendations for careers
    if 'recommend' in prompt.lower() or 'suggest' in prompt.lower():
        return recommend_careers()

    # Check if the user is asking for a new job or context switch
    matched_job = None
    for job in df['job'].unique():
        if job.lower() in prompt.lower():
            matched_job = job
            break

    # If a new job is detected, start a new context
    if matched_job:
        conversation_state['job'] = matched_job
        response = f"Let's explore {matched_job}. What would you like to know? Skills, tasks, interests, or something else?"
    else:
        # Check if the user has already specified a job
        if 'job' in conversation_state:
            job = conversation_state['job']
            if 'task' in prompt.lower():
                task = get_task_for_job(job)  # Get task info for the selected job
                response = f"The task for {job} is: {task}"
            elif 'interest' in prompt.lower():
                interest = get_interest_for_job(job)  # Get interest info for the selected job
                response = f"The type of interest for {job} is: {interest}"
            elif 'skills' in prompt.lower():
                skills = get_skills_for_job(job)  # Get skills info for the selected job
                response = f"The skills for {job} are: {skills}"
            else:
                response = generate_response_from_context(prompt, conversation_state)
        else:
            response = "Please tell me which job you're interested in first."

    return response

def recommend_careers():
    # Select random jobs from the dataset
    recommended_jobs = random.sample(list(df['job'].unique()), k=3)  # Adjust `k` for more or fewer recommendations
    return f"Here are some career suggestions for you: {', '.join(recommended_jobs)}. Which one interests you?"

def get_task_for_job(job):
    # Retrieve the task for the specific job from your DataFrame
    row = df[df['job'].str.lower() == job.lower()].iloc[0]
    return row['task'] if pd.notna(row['task']) else "Task information not available."

def get_interest_for_job(job):
    # Retrieve the interest description for the specific job from your DataFrame
    row = df[df['job'].str.lower() == job.lower()].iloc[0]
    return row['interest description'] if pd.notna(row['interest description']) else "Interest information not available."

def get_skills_for_job(job):
    # Retrieve the skills for the specific job from your DataFrame
    row = df[df['job'].str.lower() == job.lower()].iloc[0]
    return row['skills'] if pd.notna(row['skills']) else "Skills information not available."

def generate_response_from_context(prompt, context):
    # Tokenize the input and set attention mask and pad_token_id
    inputs = tokenizer.encode(prompt, return_tensors='pt', padding=True, truncation=True).to(model.device)

    # Create attention mask for the input
    attention_mask = torch.ones(inputs.shape, device=model.device)  # Default attention mask is 1 for all tokens

    # Generate the response using the model
    outputs = model.generate(
        inputs,
        max_length=100,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        attention_mask=attention_mask,
        pad_token_id=model.config.pad_token_id  # Ensure padding token is set correctly
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

def chatbot(input_text):
    return generate_career_guidance(input_text)

