# 🧠 Interview Support System: Fine-Tuned Falcon + Gemini

## 🚀 Setup and Installations

In [1]:
!pip install -Uqqq pip --progress-bar off

!pip install -qqq gradio --progress-bar off
!pip install -qqq bitsandbytes==0.42.0 --progress-bar off
!pip install -qqq torch==2.1.2 --progress-bar off
!pip install -qqq -U transformers==4.39.3 --progress-bar off
!pip install -qqq -U peft==0.10.0 --progress-bar off
!pip install -qqq -U accelerate==0.29.3 --progress-bar off
!pip install -qqq loralib==0.1.2 --progress-bar off
!pip install -qqq einops==0.7.0 --progress-bar off
!pip install -qqq google-generativeai

## Import Required Libraries

In [None]:
import os
import gradio as gr
import requests
import bitsandbytes as bnb
import torch
import transformers

from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

##  Configuration

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

## Load Fine-Tuned Falcon Model (QLoRA)

In [None]:
PEFT_MODEL = "Pranav06/falcon-7b-qlora-interview_qa-support-bot"

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

## Generation Settings

In [None]:
# Set device
DEVICE = "cuda:0"

In [16]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

## Falcon Prompting Function (Guidance Generation)

In [17]:
def generate_response(question: str) -> str:
    prompt = f"""
<human>: {question}
<assistant>:
""".strip()
    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.inference_mode():
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_config=generation_config,
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    assistant_start = "<assistant>:"
    response_start = response.find(assistant_start)
    response = response[response_start + len(assistant_start) :].strip()
    response_lines = response.split("\n")
    final_response = response_lines[0].strip()

    return final_response

In [18]:
prompt = "Why should we hire you?"
print(generate_response(prompt))



Talk about your relevant experience, skills, and qualifications.
Talk about your relevant experience, skills, and qualifications.


## Setup Gemini API (for Final Interview Answer)

In [None]:
from google.colab import userdata
GEMINI_API_KEY = userdata.get('GOOGLE_API_KEY')

import google.generativeai as genai

# Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)

gemini_generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
}

gen_model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    generation_config=gemini_generation_config,
)

# Gemini Prompting Function
def generate_gemini_response(guidance_text):
  prompt = f"""
    You are a professional job candidate preparing for an important interview.

    Based on the following guidance: '{guidance_text}', write a realistic and professional interview answer:
    - Use only first-person tone ("I", "my", "me").
    - Do NOT include casual words like "okay", "sure", or "here's".
    - Directly start the answer without any introduction.
    - Keep it concise, confident, and include a real-world example or achievement if possible.
    - Maintain a positive, proactive, and professional tone throughout.
    """

  response = gen_model.generate_content(prompt)
  return response.text


## Full Interview Assistant Pipeline

In [26]:
def interview_assistant(interview_question):
    # Step 1: Your Fine-tuned Falcon Model gives Guidance
    guidance = generate_response(interview_question)

    # Step 2: Send Guidance to Gemini and get Final Example Answer
    example_answer = generate_gemini_response(guidance)

    return guidance, example_answer

## Gradio Web App

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("<h1 style='text-align: center;'>🧠 Interview Support System</h1>")

    with gr.Row():
        question_input = gr.Textbox(placeholder="Enter an interview question...", label="Interview Question")

    with gr.Row():
        submit_btn = gr.Button("Generate Answer")

    with gr.Row():
        guidance_output = gr.Textbox(label="Model Guidance", interactive=False)
        example_output = gr.Textbox(label="Example Interview Answer", interactive=False)

    submit_btn.click(
        interview_assistant,
        inputs=[question_input],
        outputs=[guidance_output, example_output]
    )

demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://e34403c024b344b0cb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


