In [1]:
!pip install transformers PyPDF2 gradio

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting gradio
  Downloading gradio-4.40.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.2.0 (from gradio)
  Downloading gradio_client-1.2.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.p

In [2]:
import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

# Test the pipeline with a simple prompt
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
I don't have access to a physical or internal knowledge of humans, but in general, a human can eat approximately 250-300 calories per hour, depending on the size and activity level. So, it's safe to say that eating a whole helicopter, assuming it contains enough calories, would be challenging for humans.


In [3]:
import PyPDF2

def extract_text_from_pdf(file_path):
    pdf_reader = PyPDF2.PdfFileReader(file_path)
    text = ""
    for page_num in range(pdf_reader.numPages):
        text += pdf_reader.getPage(page_num).extract_text()
    return text

def analyze_domain(resume_text):
    domains = ['Computer Science', 'Mechanical Engineering', 'Electrical Engineering']
    for domain in domains:
        if domain.lower() in resume_text.lower():
            return domain
    return 'General'

In [4]:
def generate_hr_questions(domain, num_questions=5):
    prompt = [
        {
            "role": "system",
            "content": f"Generate {num_questions} complex HR interview questions for a candidate specialized in {domain}."
        }
    ]
    formatted_prompt = pipe.tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
    responses = pipe(formatted_prompt, max_new_tokens=200, num_return_sequences=1)
    questions = responses[0]['generated_text'].split('\n')
    return questions

In [7]:
import gradio as gr
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, pipeline
import torch
import PyPDF2

# Define the functions to extract text from PDF, analyze domain, and generate HR questions
def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page in pdf_reader.pages:
        text += page.extract_text() + '\n'
    return text

def analyze_domain(resume_text):
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
    inputs = tokenizer(resume_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    domain_mapping = {0: "Data Science", 1: "Software Engineering", 2: "Machine Learning"}
    return domain_mapping.get(predicted_class, "Unknown")

def generate_hr_questions(domain):
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

    prompt = f"Generate 10 high-quality HR interview questions for a candidate specializing in {domain}:"
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    questions = outputs[0]["generated_text"]
    return questions.splitlines()

# Initialize variables
resume_text = ""
domain = ""
chat_history = []

# Define the chatbot function
def chatbot(user_message):
    global resume_text, domain, chat_history
    if "upload resume" in user_message.lower():
        return "Please upload a PDF resume."
    elif "generate questions" in user_message.lower():
        if resume_text:
            questions = generate_hr_questions(domain)
            response_message = f"Domain: {domain}\nQuestions:\n" + "\n".join(questions)
        else:
            response_message = "Please upload a PDF resume first."
    else:
        response_message = "How can I assist you further? Type 'generate questions' to get interview questions."

    chat_history.append((user_message, response_message))
    return chat_history

# Define the upload resume function
def upload_resume(file):
    global resume_text, domain, chat_history
    resume_text = extract_text_from_pdf(file.name)
    domain = analyze_domain(resume_text)
    chat_history = [("Hello, how can I assist you today?", "")]
    return "Resume uploaded and analyzed. You can now start the chat.", chat_history

# Define the chatbot update function
def update_chatbot(chat_history, user_message):
    chat_history = chatbot(user_message)
    return chat_history

# Create the Gradio Blocks
with gr.Blocks() as demo:
    gr.Markdown("# HR Interview Preparation Chatbot")
    with gr.Row():
        file_input = gr.File(label="Upload your resume (PDF)", file_types=['pdf'])
        upload_button = gr.Button("Upload and Analyze Resume")
        upload_status = gr.Textbox(label="Status")

    chatbot_output = gr.Chatbot(label="Chatbot")
    chat_input = gr.Textbox(label="Chat Input")
    chat_button = gr.Button("Send")

    upload_button.click(upload_resume, inputs=file_input, outputs=[upload_status, chatbot_output])
    chat_button.click(update_chatbot, inputs=[chatbot_output, chat_input], outputs=chatbot_output)

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://79ab42eb24b04cc4ee.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [11]:
import gradio as gr
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, pipeline
import torch
import PyPDF2

# Define the functions to extract text from PDF, analyze domain, and generate HR questions
def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page in pdf_reader.pages:
…        chat_button = gr.Button("Send")

    upload_button.click(upload_resume, inputs=file_input, outputs=[upload_status, chatbot_output, detected_domain])
    chat_button.click(update_chatbot, inputs=[chatbot_output, chat_input, domain_dropdown], outputs=chatbot_output)

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://9c8d9bf13388854a56.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [9]:
import gradio as gr
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, pipeline
import torch
import PyPDF2

# Define the functions to extract text from PDF, analyze domain, and generate HR questions
def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page in pdf_reader.pages:
        text += page.extract_text() + '\n'
    return text

def analyze_domain(resume_text):
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
    inputs = tokenizer(resume_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
    domain_mapping = {0: "Data Science", 1: "Software Engineering", 2: "Machine Learning"}
    return domain_mapping.get(predicted_class, "Unknown")

def generate_hr_questions(domain):
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

    prompt = f"Generate 10 high-quality HR interview questions for a candidate specializing in {domain}:"
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    questions = outputs[0]["generated_text"]
    return questions.splitlines()

# Initialize variables
resume_text = ""
domain = ""
chat_history = []
domains = ["Data Science", "Software Engineering", "Machine Learning"]

# Define the chatbot function
def chatbot(user_message, selected_domain):
    global resume_text, domain, chat_history
    if "upload resume" in user_message.lower():
        return "Please upload a PDF resume."
    elif "generate questions" in user_message.lower():
        if resume_text:
            questions = generate_hr_questions(selected_domain)
            response_message = f"Domain: {selected_domain}\nQuestions:\n" + "\n".join(questions)
        else:
            response_message = "Please upload a PDF resume first."
    else:
        response_message = "How can I assist you further? Type 'generate questions' to get interview questions."

    chat_history.append((user_message, response_message))
    return chat_history

# Define the upload resume function
def upload_resume(file):
    global resume_text, domain, chat_history
    resume_text = extract_text_from_pdf(file.name)
    domain = analyze_domain(resume_text)
    chat_history = [("Hello, how can I assist you today?", "")]
    return "Resume uploaded and analyzed. You can now start the chat.", chat_history, domain

# Define the chatbot update function
def update_chatbot(chat_history, user_message, selected_domain):
    chat_history = chatbot(user_message, selected_domain)
    return chat_history

# Create the Gradio Blocks
with gr.Blocks() as demo:
    gr.Markdown("# HR Interview Preparation Chatbot")
    with gr.Row():
        file_input = gr.File(label="Upload your resume (PDF)", file_types=['pdf'])
        upload_button = gr.Button("Upload and Analyze Resume")
        upload_status = gr.Textbox(label="Status")
        detected_domain = gr.Textbox(label="Detected Domain")

    with gr.Row():
        domain_dropdown = gr.Dropdown(label="Select Domain", choices=domains)
        chatbot_output = gr.Chatbot(label="Chatbot")
        chat_input = gr.Textbox(label="Chat Input")
        chat_button = gr.Button("Send")

    upload_button.click(upload_resume, inputs=file_input, outputs=[upload_status, chatbot_output, detected_domain])
    chat_button.click(update_chatbot, inputs=[chatbot_output, chat_input, domain_dropdown], outputs=chatbot_output)

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://25ea6dbf9181b5912a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [10]:
import spacy
from spacy.util import minibatch, compounding
import PyPDF2
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, pipeline

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Define the functions to extract text from PDF, analyze domain, and generate HR questions
def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page in pdf_reader.pages:
        text += page.extract_text() + '\n'
    return text

def extract_keywords(text):
    doc = nlp(text)
    keywords = []
    for token in doc:
        if token.pos_ in ["NOUN", "PROPN"]:
            keywords.append(token.text)
    return keywords

def analyze_domain(keywords):
    # Use the keywords to determine the domain
    domains = []
    for keyword in keywords:
        if keyword.lower() in ["machine learning", "deep learning", "ai"]:
            domains.append("Machine Learning")
        elif keyword.lower() in ["software engineering", "programming", "development"]:
            domains.append("Software Engineering")
        elif keyword.lower() in ["data science", "data analysis", "statistics"]:
            domains.append("Data Science")
        # Add more conditions for other domains
    return domains

def generate_hr_questions(domains):
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

    questions = []
    for domain in domains:
        prompt = f"Generate 10 high-quality HR interview questions for a candidate specializing in {domain}:"
        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        questions.extend(outputs[0]["generated_text"].splitlines())
    return questions

# Define the chatbot function
def chatbot(user_message, cv_file):
    cv_text = extract_text_from_pdf(cv_file)
    keywords = extract_keywords(cv_text)
    domains = analyze_domain(keywords)
    questions = generate_hr_questions(domains)
    return questions

# Create the Gradio Blocks
with gr.Blocks() as demo:
    gr.Markdown("# HR Interview Preparation Chatbot")
    with gr.Row():
        cv_file = gr.File(label="Upload your CV (PDF)", file_types=['pdf'])
        chat_input = gr.Textbox(label="Chat Input")
        chat_button = gr.Button("Send")

    chatbot_output = gr.Chatbot(label="Chatbot")

    chat_button.click(chatbot, inputs=[chat_input, cv_file], outputs=chatbot_output)

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://478c2827cda1a97098.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


