<a href="https://colab.research.google.com/github/saksham1965/interview-bot/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import json
import random
import argparse
import datetime
from typing import List, Dict, Any, Optional

# Optional import for LLM features. The user may run without OpenAI installed.
try:
    import openai
    OPENAI_AVAILABLE = True
except Exception:
    OPENAI_AVAILABLE = False

# ---------- Config / Templates ----------
QUESTION_TEMPLATES = {
    "software_engineer": {
        "backend": [
            "Explain how you would design a scalable REST API for a social media feed.",
            "Describe how you'd debug a memory leak in a Python service.",
            "Walk me through how you'd design a database schema for an online store's orders."
        ],
        "algorithms": [
            "What's your approach to solving an array partitioning problem? Give step-by-step reasoning.",
            "Describe how a hash table works and its typical use-cases.",
            "Explain dynamic programming with an example."
        ]
    },
    "data_scientist": {
        "ml": [
            "How do you decide whether to use a linear model vs a tree-based model?",
            "Explain cross-validation and why it's important.",
            "Describe a time you improved model performance by feature engineering."
        ],
        "statistics": [
            "Explain p-values and why they can be misinterpreted.",
            "When would you use a Bayesian approach vs frequentist?",
            "Describe how to handle imbalanced classes in classification."
        ]
    },
    "product_manager": {
        "strategy": [
            "How would you prioritize features for a new mobile app?",
            "Describe metrics you would track for a launch and why.",
            "Tell me about a product tradeoff you made and the outcome."
        ]
    }
}

DEFAULT_RUBRIC = {
    "clarity": {
        "description": "How clear and structured the answer is.",
        "weights": {"excellent": 5, "good": 3, "fair": 1, "poor": 0}
    },
    "technical_accuracy": {
        "description": "Correctness and depth of technical content.",
        "weights": {"excellent": 5, "good": 3, "fair": 1, "poor": 0}
    },
    "problem_solving": {
        "description": "Approach to solving problems, tradeoffs, and reasoning.",
        "weights": {"excellent": 5, "good": 3, "fair": 1, "poor": 0}
    },
    "communication": {
        "description": "Ability to communicate technical ideas to different audiences.",
        "weights": {"excellent": 5, "good": 3, "fair": 1, "poor": 0}
    }
}

SESSIONS_DIR = "interview_sessions"
RUBRICS_FILE = "rubrics.json"

# ---------- Utilities ----------

def ensure_dirs():
    os.makedirs(SESSIONS_DIR, exist_ok=True)


def load_or_create_rubrics():
    if not os.path.exists(RUBRICS_FILE):
        with open(RUBRICS_FILE, "w") as f:
            json.dump({"default": DEFAULT_RUBRIC}, f, indent=2)
        return {"default": DEFAULT_RUBRIC}
    else:
        with open(RUBRICS_FILE, "r") as f:
            return json.load(f)


def save_session(session: Dict[str, Any]):
    ensure_dirs()
    ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    fname = f"session_{session['candidate_name']}_{ts}.json"
    path = os.path.join(SESSIONS_DIR, fname)
    with open(path, "w") as f:
        json.dump(session, f, indent=2)
    return path

# ---------- Question Generation ----------
class QuestionGenerator:
    def __init__(self, llm_enabled: bool = False):
        self.llm_enabled = llm_enabled and OPENAI_AVAILABLE and os.getenv("OPENAI_API_KEY")

    def sample_questions(self, role: str, skills: List[str], n: int = 5) -> List[str]:
        templates = QUESTION_TEMPLATES.get(role, {})
        pool = []
        for s in skills:
            pool.extend(templates.get(s, []))
        if not pool:
            # fallback: combine generic prompts
            pool = [
                "Tell me about a challenging problem you solved recently.",
                "How do you stay up-to-date in your field?",
                "Explain a technical decision you made and why."
            ]

        questions = random.sample(pool, min(n, len(pool)))

        # Optionally augment with LLM (if enabled)
        if self.llm_enabled:
            try:
                augmented = self._augment_with_llm(role, skills, questions)
                if augmented:
                    questions = augmented
            except Exception:
                pass  # keep the sampled ones if LLM fails

        return questions

    def _augment_with_llm(self, role: str, skills: List[str], base_questions: List[str]) -> Optional[List[str]]:
        # Example: call OpenAI to rewrite/expand questions to be more varied
        prompt = (
            f"You are a helpful interview question generator. Role: {role}. Skills: {', '.join(skills)}.\n"
            f"Given these base prompts: {base_questions}\n"
            "Return 1-to-1 rewritten versions that are clear, distinct, and targeted. Return as JSON array."
        )
        resp = openai.Completion.create(
            engine="text-davinci-003",
            prompt=prompt,
            max_tokens=500,
            temperature=0.8,
            n=1
        )
        text = resp.choices[0].text.strip()
        try:
            out = json.loads(text)
            if isinstance(out, list):
                return out
        except Exception:
            return None
        return None

# ---------- Evaluator / Rubric ----------
class Evaluator:
    def __init__(self, rubric: Dict[str, Any], llm_enabled: bool = False):
        self.rubric = rubric
        self.llm_enabled = llm_enabled and OPENAI_AVAILABLE and os.getenv("OPENAI_API_KEY")

    def evaluate_manual(self, answer: str, scores: Dict[str, str]) -> Dict[str, Any]:
        """Use manual rubric ratings (excellent/good/fair/poor) provided in scores dict."""
        feedback = {}
        total = 0
        max_total = 0
        for criterion, cfg in self.rubric.items():
            weight_map = cfg.get("weights", {})
            rating = scores.get(criterion, "fair")
            pts = weight_map.get(rating, 0)
            max_pts = max(weight_map.values()) if weight_map else 1
            feedback[criterion] = {
                "rating": rating,
                "points": pts,
                "description": cfg.get("description", "")
            }
            total += pts
            max_total += max_pts
        normalized_score = round((total / (max_total or 1)) * 100, 1)
        return {"feedback": feedback, "score_percent": normalized_score}

    def evaluate_auto(self, question: str, answer: str) -> Dict[str, Any]:
        """Use an LLM to automatically rate the answer per rubric. Falls back if LLM not available."""
        if not self.llm_enabled:
            return {"error": "LLM not enabled or available"}

        # Build instruction
        rubric_text = json.dumps(self.rubric)
        prompt = (
            f"Evaluate the candidate's answer for the following question. Return a JSON object with each rubric criterion mapped to one of [excellent, good, fair, poor].\n"
            f"Question: {question}\nAnswer: {answer}\nRubric: {rubric_text}\nReturn only valid JSON."
        )
        resp = openai.Completion.create(
            engine="text-davinci-003",
            prompt=prompt,
            max_tokens=400,
            temperature=0.0,
            n=1
        )
        text = resp.choices[0].text.strip()
        try:
            out = json.loads(text)
            # convert to manual evaluate form
            return self.evaluate_manual(answer, out)
        except Exception:
            return {"error": "Failed to parse LLM output", "raw": text}

# ---------- Interview Session Flow ----------
class InterviewSession:
    def __init__(self, candidate_name: str, role: str, skills: List[str], rubric_name: str = "default", llm_enabled: bool = False):
        self.candidate_name = candidate_name
        self.role = role
        self.skills = skills
        self.started_at = datetime.datetime.utcnow().isoformat() + "Z"
        self.questions: List[Dict[str, Any]] = []
        self.rubrics = load_or_create_rubrics()
        self.rubric = self.rubrics.get(rubric_name, self.rubrics.get("default", DEFAULT_RUBRIC))
        self.generator = QuestionGenerator(llm_enabled=llm_enabled)
        self.evaluator = Evaluator(self.rubric, llm_enabled=llm_enabled)

    def prepare_questions(self, n: int = 5):
        qs = self.generator.sample_questions(self.role, self.skills, n=n)
        self.questions = [{"q": q, "answer": None, "eval": None} for q in qs]

    def run_cli(self, auto_eval: bool = False):
        print(f"Starting mock interview for {self.candidate_name} — role: {self.role}")
        for i, item in enumerate(self.questions, 1):
            print(f"\nQ{i}: {item['q']}")
            ans = input("Your answer (type and press enter):\n")
            item['answer'] = ans
            if auto_eval and self.evaluator.llm_enabled:
                ev = self.evaluator.evaluate_auto(item['q'], ans)
                item['eval'] = ev
                print("Auto-evaluation:")
                print(json.dumps(ev, indent=2))
            else:
                # allow manual scoring
                scores = {}
                print("Rate the following criteria: excellent / good / fair / poor (press enter to use 'fair')")
                for crit in self.rubric.keys():
                    r = input(f" - {crit}: ") or "fair"
                    scores[crit] = r
                ev = self.evaluator.evaluate_manual(ans, scores)
                item['eval'] = ev
                print("Evaluation result:")
                print(json.dumps(ev, indent=2))

    def summary(self) -> Dict[str, Any]:
        # Aggregate
        items = []
        scores = []
        for item in self.questions:
            items.append({"question": item['q'], "answer": item['answer'], "evaluation": item['eval']})
            if item['eval'] and 'score_percent' in item['eval']:
                scores.append(item['eval']['score_percent'])
        avg_score = round(sum(scores) / len(scores), 1) if scores else None
        summary = {
            "candidate": self.candidate_name,
            "role": self.role,
            "started_at": self.started_at,
            "average_score": avg_score,
            "questions": items
        }
        return summary

# ---------- CLI Entrypoint ----------
def main():
    parser = argparse.ArgumentParser(description="AI Interview Bot - Mock Interview CLI")
    parser.add_argument("--start", action="store_true", help="Start an interactive mock interview")
    parser.add_argument("--candidate", type=str, help="Candidate name")
    parser.add_argument("--role", type=str, choices=list(QUESTION_TEMPLATES.keys()), help="Role to interview for")
    parser.add_argument("--skills", type=str, help="Comma-separated skills (e.g. backend,algorithms)")
    parser.add_argument("--questions", type=int, default=5, help="Number of questions to ask")
    parser.add_argument("--auto-eval", action="store_true", help="Attempt automated LLM evaluation (requires OpenAI key)")
    parser.add_argument("--llm", action="store_true", help="Allow LLM for question generation and evaluation (requires OpenAI key)")

    # Use parse_known_args to ignore arguments passed by Jupyter/Colab kernel
    # and only process arguments relevant to the script.
    args = parser.parse_known_args()[0]

    if args.start:
        candidate = args.candidate or input("Candidate name: ")
        role = args.role or input(f"Role ({', '.join(QUESTION_TEMPLATES.keys())}): ")
        skills_str = args.skills or input("Skills (comma separated): ")
        skills = [s.strip() for s in skills_str.split(",") if s.strip()]
        session = InterviewSession(candidate, role, skills, llm_enabled=args.llm)
        session.prepare_questions(n=args.questions)
        session.run_cli(auto_eval=args.auto_eval)
        summ = session.summary()
        path = save_session(summ)
        print(f"\nSession saved to: {path}")
        print("Summary:")
        print(json.dumps(summ, indent=2))
    else:
        parser.print_help()

if __name__ == "__main__":
    main()


usage: colab_kernel_launcher.py [-h] [--start] [--candidate CANDIDATE]
                                [--role {software_engineer,data_scientist,product_manager}]
                                [--skills SKILLS] [--questions QUESTIONS]
                                [--auto-eval] [--llm]

AI Interview Bot - Mock Interview CLI

options:
  -h, --help            show this help message and exit
  --start               Start an interactive mock interview
  --candidate CANDIDATE
                        Candidate name
  --role {software_engineer,data_scientist,product_manager}
                        Role to interview for
  --skills SKILLS       Comma-separated skills (e.g. backend,algorithms)
  --questions QUESTIONS
                        Number of questions to ask
  --auto-eval           Attempt automated LLM evaluation (requires OpenAI key)
  --llm                 Allow LLM for question generation and evaluation
                        (requires OpenAI key)


In [None]:
import sys

# Backup original sys.argv
original_argv = sys.argv

# Simulate command line arguments
sys.argv = [
    'colab_kernel_launcher.py',
    '--start',
    '--candidate', 'Alice Smith',
    '--role', 'software_engineer',
    '--skills', 'backend,algorithms',
    '--questions', '2' # Limiting to 2 questions for demonstration
]

# Call the main function
try:
    main()
finally:
    # Restore original sys.argv
    sys.argv = original_argv


  self.started_at = datetime.datetime.utcnow().isoformat() + "Z"


Starting mock interview for Alice Smith — role: software_engineer

Q1: Explain how you would design a scalable REST API for a social media feed.
