In [1]:
!pip install -q sentence-transformers spacy textblob
!python -m spacy download en_core_web_sm
!python -m textblob.download_corpora


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m90.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up

In [2]:
import spacy
from sentence_transformers import SentenceTransformer, util
import random

nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer('all-MiniLM-L6-v2')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
def preprocess(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)


In [4]:
questions = {
    "data analyst": [
        {
            "id": "q1",
            "question": "How do you approach data cleaning?",
            "ideal_answer": "I use pandas to handle missing values, remove duplicates, and standardize formats."
        },
        {
            "id": "q2",
            "question": "What is the difference between variance and standard deviation?",
            "ideal_answer": "Variance measures data spread; standard deviation is its square root."
        },
        {
            "id": "q3",
            "question": "Describe a project where you used SQL.",
            "ideal_answer": "I wrote SQL queries to extract insights from customer data and created summary dashboards."
        }
    ],

    "hr": [
        {
            "id": "q1",
            "question": "Describe a time you handled conflict in a team.",
            "ideal_answer": "I encouraged open communication, mediated differences, and aligned everyone on shared goals."
        },
        {
            "id": "q2",
            "question": "What steps do you take in recruitment?",
            "ideal_answer": "I handle sourcing, screening, interviewing, and onboarding candidates efficiently."
        },
        {
            "id": "q3",
            "question": "How do you ensure diversity and inclusion in hiring?",
            "ideal_answer": "By using blind resumes, structured interviews, and broadening sourcing strategies."
        }
    ],

    "software engineer": [
        {
            "id": "q1",
            "question": "Explain the difference between multithreading and multiprocessing.",
            "ideal_answer": "Multithreading shares the same memory space; multiprocessing runs processes in separate memory spaces for parallelism."
        },
        {
            "id": "q2",
            "question": "How do you handle version control?",
            "ideal_answer": "I use Git for branching, merging, and managing code across teams."
        },
        {
            "id": "q3",
            "question": "Describe your experience with REST APIs.",
            "ideal_answer": "I have designed and consumed REST APIs using Flask and FastAPI, ensuring stateless communication."
        }
    ],

    "product manager": [
        {
            "id": "q1",
            "question": "How do you prioritize product features?",
            "ideal_answer": "I use frameworks like RICE and MoSCoW based on customer impact, effort, and alignment with strategy."
        },
        {
            "id": "q2",
            "question": "Describe a time you handled conflicting stakeholder requirements.",
            "ideal_answer": "I conducted meetings to understand concerns, aligned on goals, and proposed a roadmap balancing priorities."
        },
        {
            "id": "q3",
            "question": "What’s your approach to user research?",
            "ideal_answer": "I gather insights through surveys, user interviews, A/B testing, and analytics tools."
        }
    ],

    "marketing analyst": [
        {
            "id": "q1",
            "question": "How do you measure the effectiveness of a marketing campaign?",
            "ideal_answer": "By analyzing KPIs like ROI, CTR, conversion rate, and customer acquisition cost using tools like Google Analytics."
        },
        {
            "id": "q2",
            "question": "What tools do you use for marketing data analysis?",
            "ideal_answer": "Google Analytics, HubSpot, Excel, and Tableau are my go-to tools for analyzing campaign performance."
        },
        {
            "id": "q3",
            "question": "How do you segment a market?",
            "ideal_answer": "I segment based on demographics, psychographics, behavior, and customer needs using collected data."
        }
    ]
}


In [5]:
available_roles = list(questions.keys())

print("👋 Welcome to the AI Interview Bot!")
print("Please choose a role to apply for:")

for idx, role in enumerate(available_roles, start=1):
    print(f"{idx}. {role.title()}")

role_choice = int(input("Enter your choice (number): "))
user_role = available_roles[role_choice - 1]


👋 Welcome to the AI Interview Bot!
Please choose a role to apply for:
1. Data Analyst
2. Hr
3. Software Engineer
4. Product Manager
5. Marketing Analyst
Enter your choice (number): 1


In [6]:
role_questions = random.sample(questions[user_role], k=min(2, len(questions[user_role])))

user_answers = {}

for q in role_questions:
    print(f"\n💬 {q['question']}")
    user_input = input("Your Answer: ")
    user_answers[q['id']] = {
        "user_answer": user_input,
        "ideal_answer": q["ideal_answer"]
    }



💬 Describe a project where you used SQL.
Your Answer: i used sql to analyze sales data in a retail database.

💬 How do you approach data cleaning?
Your Answer: I start by inspecting the dataset for missing values, duplicates, and inconsistent formats.   I handle missing values using imputation or removal based on context.   Then I normalize text fields, encode categorical variables, and scale numerical features if needed.   I also remove outliers and ensure the data types are consistent across columns.


In [7]:
def get_similarity(text1, text2):
    emb1 = model.encode(text1, convert_to_tensor=True)
    emb2 = model.encode(text2, convert_to_tensor=True)
    return float(util.cos_sim(emb1, emb2)[0][0])


In [8]:
def get_feedback(score):
    if score >= 0.8:
        return "✅ Excellent answer — very relevant and complete."
    elif score >= 0.5:
        return "🟡 Decent answer — try to elaborate more or add examples."
    else:
        return "🔴 Needs improvement — try to be more specific or relevant."


In [9]:
results = {}

for qid, pair in user_answers.items():
    cleaned_user = preprocess(pair["user_answer"])
    cleaned_ideal = preprocess(pair["ideal_answer"])

    sim_score = get_similarity(cleaned_user, cleaned_ideal)
    score_5 = round(sim_score * 5, 2)
    feedback = get_feedback(sim_score)

    results[qid] = {
        "user_answer": pair["user_answer"],
        "similarity_score": round(sim_score, 3),
        "score_out_of_5": score_5,
        "feedback": feedback
    }


In [10]:
import pandas as pd
print("\n📊 Final Interview Results:\n")
pd.DataFrame(results).T



📊 Final Interview Results:



Unnamed: 0,user_answer,similarity_score,score_out_of_5,feedback
q3,i used sql to analyze sales data in a retail d...,0.458,2.29,🔴 Needs improvement — try to be more specific ...
q1,I start by inspecting the dataset for missing ...,0.566,2.83,🟡 Decent answer — try to elaborate more or add...


In [11]:
import json
with open("interview_results.json", "w") as f:
    json.dump(results, f, indent=2)
