In [3]:
# Configure ngrok with your token
!pip install pyngrok
!ngrok authtoken 2ntMiQeqSVSBgSqa0J9P8QGiZ4G_2LTQDpNYoUhdkZ92EuPcy

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [5]:
'''
# Asha AI Chatbot Implementation
# This notebook sets up a free AI chatbot for JobsForHer with RAG capabilities
'''

# STEP 1: Install necessary packages
!pip install -q transformers datasets sentence-transformers faiss-cpu nltk gradio langchain llama-index pypdf flask pyngrok huggingface_hub

# STEP 2: Import required libraries
import os
import json
import nltk
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer
from pyngrok import ngrok
import faiss
import gradio as gr
from huggingface_hub import login

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# STEP 3: Setup knowledge base
# Create some sample data about JobsForHer (in real scenario, you'd import from files)
job_listings = [
    {"id": 1, "title": "Software Engineer", "company": "TechCorp", "location": "Remote",
     "description": "Software engineering role focused on frontend development.", "tags": ["tech", "coding", "frontend"]},
    {"id": 2, "title": "Data Analyst", "company": "DataInsights", "location": "Bangalore",
     "description": "Analyze business data and create dashboards.", "tags": ["data", "analytics", "sql"]},
    {"id": 3, "title": "Product Manager", "company": "InnovateCo", "location": "Hybrid",
     "description": "Lead product development initiatives for women-focused tech products.", "tags": ["product", "leadership"]},
    {"id": 4, "title": "Marketing Specialist", "company": "GrowthMedia", "location": "Mumbai",
     "description": "Create marketing campaigns for women empowerment initiatives.", "tags": ["marketing", "social media"]}
]

events = [
    {"id": 1, "title": "Women in Tech Conference", "date": "2025-06-15", "location": "Virtual",
     "description": "Annual conference showcasing women leaders in technology."},
    {"id": 2, "title": "Resume Building Workshop", "date": "2025-06-20", "location": "Bangalore",
     "description": "Learn how to create effective resumes for tech industry roles."},
    {"id": 3, "title": "Networking Mixer", "date": "2025-06-25", "location": "Delhi",
     "description": "Connect with women professionals across industries."}
]

mentorship_programs = [
    {"id": 1, "title": "Tech Leadership Program", "duration": "3 months",
     "description": "Mentorship for women aspiring to leadership roles in tech."},
    {"id": 2, "title": "Career Comeback Program", "duration": "2 months",
     "description": "Support for women returning to work after a career break."},
    {"id": 3, "title": "Entrepreneurship Guidance", "duration": "6 months",
     "description": "Mentorship for women starting their own businesses."}
]

faqs = [
    {"question": "How do I update my profile?",
     "answer": "Log in to your JobsForHer account, click on your profile picture, select 'Edit Profile', and update your information."},
    {"question": "How can I apply for jobs?",
     "answer": "Browse job listings, click on a job you're interested in, and click the 'Apply' button. You'll need to complete your profile first."},
    {"question": "What is the mentorship program?",
     "answer": "Our mentorship programs connect you with experienced professionals who can guide your career growth in specific areas."},
    {"question": "How do I sign up for events?",
     "answer": "Browse our events section, select an event, and click 'Register'. You'll receive a confirmation email with details."}
]

company_info = """
JobsForHer Foundation is dedicated to empowering women in their professional careers through job opportunities,
mentorship, networking events, and skill development. Our platform connects women with employers committed to
diversity and inclusion, providing resources to help women advance in their careers or return to the workforce
after a break. We offer job listings across various industries, mentorship programs, career advice, and
community events designed specifically for women professionals.
"""

# STEP 4: Create text chunks for embedding
all_texts = []
all_metadata = []

# Process job listings
for job in job_listings:
    text = f"Job Title: {job['title']}\nCompany: {job['company']}\nLocation: {job['location']}\nDescription: {job['description']}"
    all_texts.append(text)
    all_metadata.append({"type": "job", "id": job["id"]})

# Process events
for event in events:
    text = f"Event: {event['title']}\nDate: {event['date']}\nLocation: {event['location']}\nDescription: {event['description']}"
    all_texts.append(text)
    all_metadata.append({"type": "event", "id": event["id"]})

# Process mentorship programs
for program in mentorship_programs:
    text = f"Program: {program['title']}\nDuration: {program['duration']}\nDescription: {program['description']}"
    all_texts.append(text)
    all_metadata.append({"type": "mentorship", "id": program["id"]})

# Process FAQs
for faq in faqs:
    text = f"Q: {faq['question']}\nA: {faq['answer']}"
    all_texts.append(text)
    all_metadata.append({"type": "faq", "id": faqs.index(faq)})

# Add company info
all_texts.append(company_info)
all_metadata.append({"type": "info", "id": 0})

# STEP 5: Build the RAG system
# Load sentence transformer model for embeddings
print("Loading embedding model...")
embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Create embeddings
print("Creating embeddings...")
embeddings = embedding_model.encode(all_texts)

# Build FAISS index for efficient similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))

# STEP 6: Load a small, free language model from Hugging Face
print("Loading language model (this may take a few minutes)...")
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Small model that works without API key
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True)

# STEP 7: Create a function to generate responses
def generate_response(query, chat_history=None):
    if chat_history is None:
        chat_history = []

    # Create embedding for the query
    query_embedding = embedding_model.encode([query])[0]

    # Search for similar content in our knowledge base
    k = 3  # Number of results to retrieve
    distances, indices = index.search(np.array([query_embedding]).astype('float32'), k)

    # Retrieve the most similar texts
    retrieved_texts = [all_texts[idx] for idx in indices[0]]
    retrieved_metadata = [all_metadata[idx] for idx in indices[0]]

    # Prepare context
    context = "\n\n".join(retrieved_texts)

    # Process common queries directly
    if "update profile" in query.lower() or "edit profile" in query.lower():
        return "To update your profile, log in to your JobsForHer account, click on your profile picture in the top right, select 'Edit Profile', and update your information as needed."

    if "apply for job" in query.lower() or "job application" in query.lower():
        return "To apply for jobs, browse our job listings, click on a job you're interested in, and click the 'Apply' button. Make sure your profile is complete with your resume and relevant experience."

    if "sign up" in query.lower() or "register" in query.lower() or "create account" in query.lower():
        return "To create a JobsForHer account, visit our homepage and click 'Sign Up'. Enter your email address and create a password, then complete your profile with your professional details."

    # Create a prompt for the model
    prompt = f"""You are Asha, an AI assistant for JobsForHer Foundation, a platform dedicated to empowering women in their professional careers.
Answer the following question based on the context provided. Be helpful, supportive, and encouraging.
If the information is not in the context, use your general knowledge but mention that this is general advice.

Context:
{context}

Chat History:
{chat_history}

Question: {query}

Asha:"""

    # Generate response
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    output = model.generate(
        input_ids,
        max_length=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract only the assistant's response (after "Asha:")
    try:
        response = response.split("Asha:")[-1].strip()
    except:
        response = "I'm sorry, I couldn't generate a proper response. Please try rephrasing your question."

    # Apply gender bias detection and correction (simplified)
    biased_terms = ["girl", "girls", "lady", "ladies"]
    for term in biased_terms:
        if term in response.lower():
            response = response.replace(term, "woman").replace(term.capitalize(), "Woman")

    return response

# STEP 8: Create a Gradio interface
def respond(message, history):
    # Format chat history for context
    formatted_history = ""
    for h in history:
        formatted_history += f"User: {h[0]}\nAsha: {h[1]}\n\n"

    bot_message = generate_response(message, formatted_history)
    return bot_message

# Create welcome message
welcome_message = """
# Welcome to Asha AI 👋

I'm your virtual assistant for JobsForHer Foundation. I can help you with:

* Finding job opportunities
* Learning about mentorship programs
* Discovering upcoming events
* Answering questions about profile setup
* Providing career guidance for women professionals

How can I assist you today?
"""

# Create and launch the interface
demo = gr.ChatInterface(
    respond,
    title="Asha AI - JobsForHer Assistant",
    description=welcome_message,
    theme="soft",
    examples=[
        "Tell me about JobsForHer",
        "What job opportunities are available?",
        "Are there any upcoming events?",
        "How do I update my profile?",
        "Tell me about mentorship programs"
    ]
)

# STEP 9: Launch with Gradio's built-in sharing
# No need for ngrok, we'll use Gradio's sharing feature

# Start the Gradio app with sharing enabled
print("Starting Gradio app with sharing enabled...")
print("When complete, you'll see a public URL (*.gradio.live) that you can use to access the chatbot")
demo.launch(share=True)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Loading embedding model...
Creating embeddings...
Loading language model (this may take a few minutes)...
Starting Gradio app with sharing enabled...
When complete, you'll see a public URL (*.gradio.live) that you can use to access the chatbot
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8982dd667a4d98f67d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


