In [6]:
import openai
import os


In [None]:
import openai

openai.api_key = "OPENAI_API_KEY"
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": "Generate 3 interview questions for a backend Python developer skilled in APIs and SQL."}
    ],
    temperature=0.7,
    max_tokens=300
)

print(response['choices'][0]['message']['content'])

1. Can you walk us through a recent project where you utilized APIs in Python to interact with external systems or services?
2. How do you approach designing database schemas and writing efficient SQL queries to optimize performance in your backend applications?
3. Can you discuss a challenging issue you encountered while working with APIs and SQL in a project, and how you resolved it?


In [13]:
import pandas as pd

# Load cleaned resumes and job descriptions
resumes_df = pd.read_csv("../data/resumes_cleaned.csv")
jd_df = pd.read_csv("../data/job_descriptions.csv")

# Load top matched resume–JD pairs (from Phase 2)
matches_df = pd.read_csv("../data/top_jd_matches_per_resume.csv")

# Merge to bring full JD and Resume text
def extract_jd_index(jd_label):
    return int(jd_label.split("_")[-1])

matches_df['JD_Index'] = matches_df['Top1_JD'].apply(extract_jd_index)

matches_df['JD_Text'] = matches_df['JD_Index'].apply(lambda idx: jd_df.loc[idx, 'JD_Text'])
matches_df['Resume_Text'] = resumes_df['Cleaned_Resume'].head(len(matches_df))

matches_df = matches_df[['Resume_Text', 'JD_Text']]
matches_df.head()


Unnamed: 0,Resume_Text,JD_Text
0,skill programming language python panda numpy ...,**Job Title**: Senior Data Science\n**Location...
1,education detail uit rgpv data sci...,**Job Title**: Machine Learning Python Develop...
2,area interest deep learning control system des...,**Job Title**: Junior Python Developer\n**Loca...
3,skill python sap hana tabl...,**Job Title**: Senior Data Science\n**Location...
4,education detail mca ymcaust faridabad har...,**Job Title**: Data Science Analyst\n**Locatio...


In [14]:
def build_prompt(resume, jd):
    return f"""
You are an AI recruiter.

Here is a job description:
---
{jd}
---

Here is a candidate's resume:
---
{resume}
---

Based on the resume and job description above, generate 5 specific technical interview questions that the recruiter can ask this candidate to evaluate their suitability for the role. Avoid generic questions. Tailor each question to the skills or experience mentioned.

Return only the questions in numbered list format.
"""


In [None]:
import openai

openai.api_key = "OPENAI_API_KEY"
# Pick one Resume–JD pair
sample_resume = matches_df.loc[0, 'Resume_Text']
sample_jd = matches_df.loc[0, 'JD_Text']

prompt = build_prompt(sample_resume, sample_jd)

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": prompt}
    ],
    temperature=0.7,
    max_tokens=500
)

questions = response['choices'][0]['message']['content']
print(questions)


1. Can you walk us through a specific data science project you worked on where you implemented machine learning techniques such as regression, SVM, Bayesian methods, or random forest? What challenges did you face and how did you overcome them?
   
2. In your experience with natural language processing, can you explain how you approached sentiment analysis and topic modeling in a project? What tools and techniques did you use, and what were the key insights or outcomes from your analysis?

3. You mentioned working on developing a chatbot using Python, natural language processing libraries, and other technologies. Can you describe the architecture of the chatbot system you built, including how it handles user queries, generates responses, and improves over time based on user interactions?

4. How have you utilized visualization tools like Tableau, Matplotlib, and Plotly in your data science projects? Can you provide an example of a visualization you created to communicate complex data in

In [None]:
import time
import openai

openai.api_key = "OPENAI_API_KEY"
all_questions = []

# Number of candidates to generate for (change to 100 later if needed)
limit = 10

for idx in range(limit):
    resume = matches_df.loc[idx, 'Resume_Text']
    jd = matches_df.loc[idx, 'JD_Text']
    prompt = build_prompt(resume, jd)

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=500
        )

        questions = response['choices'][0]['message']['content']
        all_questions.append({
            'Resume_ID': idx,
            'Interview_Questions': questions,
            'Resume_Preview': resume[:200],
            'JD_Preview': jd[:200]
        })

        print(f"✅ {idx+1} / {limit} completed.")
        time.sleep(1.5)  # polite delay

    except Exception as e:
        print(f"⚠️  Error at index {idx}: {e}")



✅ 1 / 10 completed.
✅ 2 / 10 completed.
✅ 3 / 10 completed.
✅ 4 / 10 completed.
✅ 5 / 10 completed.
✅ 6 / 10 completed.
✅ 7 / 10 completed.
✅ 8 / 10 completed.
✅ 9 / 10 completed.
✅ 10 / 10 completed.


In [22]:
questions_df = pd.DataFrame(all_questions)
questions_df.to_csv("../data/interview_questions_output.csv", index=False)
print("✅ Interview questions saved to interview_questions_output.csv")


✅ Interview questions saved to interview_questions_output.csv
