In [2]:
import os
import json
from openai import OpenAI

# Put your Groq API key here (get it free at https://console.groq.com)
client = OpenAI(
    api_key="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",
    base_url="https://api.groq.com/openai/v1"
)

def generate_skill_mcqs(
    skill: str,
    years: str = "3-5 years",
    industry: str = None,
    num_questions: int = 10
):
    system_prompt = """You are an expert technical interviewer with 15+ years of industry experience.
You create extremely high-quality multiple-choice questions that perfectly discriminate candidates with real-world experience."""

    user_prompt = f"""Generate exactly {num_questions} multiple-choice questions for the following profile:

Skill                : {skill}
Required Experience  : {years}
Industry/Context     : {industry or "Any"}

RULES (follow exactly):
- Each question must have exactly 4 options (A, B, C, D)
- Exactly ONE option is the best/correct answer
- Exactly ONE option is plausible/acceptable but not the best (common mistake, outdated practice, or junior-level thinking)
- The other two options must be clearly wrong but believable distractors
- After the options, write on separate lines:
  Correct Answer: [LETTER]
  Explanation: [2-4 sentence detailed explanation why this is correct and why the acceptable one is not the best]
- Questions must require real {years} experience to answer confidently
- Never repeat questions
- Cover different sub-topics of the skill

Output only the questions (no intro, no numbering at the beginning)."""

    response = client.chat.completions.create(
        model="moonshotai/kimi-k2-instruct-0905",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.7,
        max_tokens=4096,
        top_p=0.95
    )

    return response.choices[0].message.content.strip()

# Example usage
if __name__ == "__main__":
    skills = [
        ("Python", "3-5 years", "General Backend / Data Engineering"),
        ("Python Automation", "4-7 years", "DevOps / Test Automation"),
        ("Deep Learning (PyTorch/TensorFlow)", "3-5 years", "Computer Vision / NLP"),
    ]

    for skill, years, industry in skills:
        print(f"\n{'='*80}")
        print(f"GENERATING 10 MCQs → {skill} ({years}) – {industry}")
        print('='*80)
        mcqs = generate_skill_mcqs(skill, years, industry)
        print(mcqs)
        print("\n" + "-"*80 + "\n")

        # Optional: save to file
        safe_name = skill.replace(" ", "_").replace("/", "_")
        with open(f"mcqs_{safe_name}_{years}.txt", "w", encoding="utf-8") as f:
            f.write(f"Skill: {skill}\nExperience: {years}\nIndustry: {industry}\n\n")
            f.write(mcqs)


GENERATING 10 MCQs → Python (3-5 years) – General Backend / Data Engineering
**Question 1**  
You receive a `MemoryError` when using `pandas.read_csv('huge.tsv', sep='\t')` on a 50 GB file on a 32 GB RAM machine. Which single-line change keeps the code pure-pandas and most memory-efficient while still allowing grouped aggregations later?

A. `df = pd.read_csv('huge.tsv', sep='\t', dtype={'id': 'int32', 'value': 'float32'})`  
B. `df = pd.read_csv('huge.tsv', sep='\t', chunksize=1_000_000)`  
C. `df = pd.read_csv('huge.tsv', sep='\t', usecols=['id','value'])`  
D. `df = pd.read_csv('huge.tsv', sep='\t', low_memory=False)`  

Correct Answer: A  
Explanation: Downsizing dtypes converts the file to ~¼ RAM footprint without sacrificing in-memory operations, letting you keep the full dataset for later groupby. Chunksize (B) streams but forces out-of-core work, while usecols (C) only helps if you can drop columns; low_memory (D) is a legacy flag that never reduces footprint.

---

**Question

## Improve Prompt Style

In [4]:
import os
from openai import OpenAI

# Get your free key from https://console.groq.com
client = OpenAI(
    api_key="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",          # replace with your real key
    base_url="https://api.groq.com/openai/v1"
)

def generate_mcqs_with_labels(
    skill: str,
    years: str = "3-5 years",
    industry: str = None,
    num_questions: int = 10
):
    system_prompt = """You are an expert technical interviewer with 15+ years of real-world experience.
You create extremely high-quality MCQs that perfectly separate junior, mid-level and senior candidates."""

    user_prompt = f"""Generate exactly {num_questions} multiple-choice questions for this profile:

            Skill               : {skill}
            Experience required : {years}
            Industry/Context    : {industry or "General"}

            STRICT OUTPUT FORMAT (never deviate):
            Q1. <question text>

            A) <option A>
            B) <option B>
            C) <option C>
            D) <option D>

            Correct Answer       : <LETTER>
            Acceptable Answer    : <LETTER> (plausible but not the best — common mistake, outdated practice or junior-level solution)
            Distractor           : <LETTER>
            Distractor           : <LETTER>

            Explanation: <2–4 sentences explaining why Correct is best, why Acceptable is not ideal, and why the two distractors are wrong>

            <blank line between questions>

            Rules:
            - Exactly 4 options (A–D)
            - Exactly 1 Correct, exactly 1 Acceptable, exactly 2 Distractors
            - Questions must need real {years} experience to answer confidently
            - Cover different sub-topics
            - Output ONLY the questions (no intro text, no markdown fences, no extra numbering)"""

    response = client.chat.completions.create(
        model="moonshotai/kimi-k2-instruct-0905",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user",   "content": user_prompt}
        ],
        temperature=0.72,      # slight creativity but very consistent format
        max_tokens=8192,
        top_p=0.95
    )

    return response.choices[0].message.content.strip()

# ──────── EXAMPLE USAGE ────────
if __name__ == "__main__":
    topics = [
        ("Python", "3-5 years", "Backend / Data Engineering"),
        ("Python Automation", "4-7 years", "DevOps / QA Automation"),
        ("Deep Learning with PyTorch/TensorFlow", "3-6 years", "Computer Vision or NLP"),
    ]

    for skill, years, industry in topics:
        print(f"\n{'='*100}")
        print(f"10 MCQs → {skill} | {years} | {industry or 'General'}")
        print('='*100)
        result = generate_mcqs_with_labels(skill, years, industry)
        print(result)

        # Save to file
        filename = f"MCQs_{skill.replace(' ', '_').replace('/', '-')}_{years}.txt"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"Skill: {skill} | Experience: {years} | Context: {industry}\n\n")
            f.write(result)
        print(f"Saved to {filename}\n")


10 MCQs → Python | 3-5 years | Backend / Data Engineering
Q1. A nightly batch job loads 50 GB of JSON Lines from S3 into an Amazon Aurora PostgreSQL cluster using `psycopg2`.  
Which combination of techniques will give the best end-to-end throughput while keeping CPU below 40 % on the writer instance?

A) Use `COPY ... FROM STDIN` with `psycopg2.extras.copy_from()` on a single connection and compress the files with gzip  
B) Split the file into 8 MB chunks, spawn 12 worker processes that each open a fresh connection and call `COPY ... FROM STDIN` with `psycopg2.extras.copy_from()`  
C) Read the file in 8 MB chunks, wrap them in `io.StringIO` and use `executemany()` with `ON CONFLICT DO NOTHING`  
D) Convert the JSON Lines to CSV, upload to S3, then run `aws s3 cp` into an Aurora table using the Aurora S3 integration

Correct Answer       : B  
Acceptable Answer    : A  
Distractor           : C  
Distractor           : D  

Explanation: `COPY` is the fastest bulk-load path in Postgres