# **Generated Resumes**

In [None]:
!pip install faker

In [None]:
import requests
import pandas as pd
import time
from faker import Faker

API_KEY = "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"
MODEL = "mistralai/mistral-7b-instruct"
ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

fake = Faker()
job_titles = ["frontend developer",
              "backend developer",
              "data analyst",
              "DevOps engineer",
              "ML engineer"
              "full-stack developer",
              "QA engineer",
              "mobile developer",
              "UI/UX designer",
              "product analyst"
              ]

portrayals = {
    "junior": ["real", "pretended to be mid", "pretended to be senior"],
    "mid": ["underplayed", "real", "overhyped"],
    "senior": ["pretended to be junior", "pretended to be mid", "real"]
}

resumes = []

for job_title in job_titles:
    for source_level in ["junior", "mid", "senior"]:
        for portrayal in portrayals[source_level]:

            name = fake.name()
            email = fake.email()
            location = fake.city()

            tone_instructions = ""

            if source_level == "senior" and "pretended" in portrayal:
                tone_instructions = """Avoid strong leadership language like 'led', 'architected', 'managed'. Use supporting roles and technical focus. No mentoring or strategic decisions."""
            elif source_level == "junior" and portrayal == "real":
                tone_instructions = """Use humble tone, mention internships, coursework, or personal projects. Avoid advanced achievements, leadership, or scaling systems."""

            prompt = f"""
Write a professional and realistic resume in English for a candidate applying as a {job_title}.

The candidate's actual experience level is {source_level}, but the resume should be written to **appear as if the candidate is {portrayal}**, using only real background and abilities.

Important:
- Do NOT mention or imply years of experience (e.g., "3 years", "since 2021", or date ranges like "June 2020 - Jan 2021")
- Do NOT use terms like junior, mid-level, senior, or expert
- Do NOT leave any placeholder fields (like Name, Email, or City) blank

You must **not fabricate or exaggerate any experience**, only subtly change the **tone, phrasing, and structure** to match the intended impression.

Candidate details:
- Full name: {name}
- Email: {email}
- City: {location}

Include the following sections:
1. Professional Summary
2. Work Experience (1–2 entries, based on actual ability)
3. Education
4. Skills

{tone_instructions}

Additional notes:
- The resume should read as if written by a human.
- Use varied sentence structure and vocabulary.
- Keep it concise, flowing, and limited to a realistic one-page format.

Return only the resume text – no extra comments, markdown, or section labels like "Resume".
Ensure the resume is complete and ends naturally with a final sentence. Avoid abrupt cutoff or partial sections.
""".strip()



            data = {
                "model": MODEL,
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": 300
            }

            try:
                response = requests.post(ENDPOINT, headers=HEADERS, json=data)
                response.raise_for_status()
                resp_json = response.json()

                if "choices" not in resp_json or not resp_json["choices"]:
                    print(f"⚠️ API error: {job_title} | {source_level} → {portrayal}")
                    continue

                content = resp_json["choices"][0]["message"]["content"].strip()

                resumes.append({
                    "job_title": job_title,
                    "source_level": source_level,
                    "portrayal": portrayal,
                    "name": name,
                    "email": email,
                    "location": location,
                    "resume_text": content
                })

                print(f"✅ Done: {job_title} | {source_level} → {portrayal}")
                time.sleep(2)

            except Exception as e:
                print(f"❌ Exception for {job_title} | {source_level} → {portrayal}: {e}")

# Save all resumes
pd.DataFrame(resumes).to_csv("resumes.csv", index=False)
print("🎉 All resumes saved to resumes.csv")


# **Scraping Resumes**

In [None]:
import requests
import re
import json
from bs4 import BeautifulSoup
import pandas as pd


headers = {
        'accept': '*/*',
        'accept-language': 'en-US,en;q=0.9',
        'referer': 'https://www.hireitpeople.com/',
        'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'script',
        'sec-fetch-mode': 'no-cors',
        'sec-fetch-site': 'cross-site',
        'sec-fetch-storage-access': 'active',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
        'x-client-data': 'CKO1yQEIkrbJAQiktskBCKmdygEI0aDKAQi72coBCJKhywEIhaDNAQjd7s4B'
    }


def build_params(start, seniority):
    return {
        'rsz': 'filtered_cse',
        'num': '10',
        'hl': 'en',
        'start': str(start),
        'source': 'gcsc',
        'cselibv': '75c56d121cde450a',
        'cx': '003506502865988516570:i3u7tkb8dcq',
        'q': seniority,
        'safe': 'off',
        'cse_tok': 'AB-tC_5rZOulMEYioXWIItaBrOvh:1746550407596',
        'sort': '',
        'exp': 'cc,apo',
        'g-recaptcha-response': '...',
        'callback': 'google.search.cse.api17176',
        'rurl': 'https://www.hireitpeople.com/resume-database?q=senior',
    }


def extract_urls_from_response(response_text):
    match = re.search(r'google\.search\.cse\.\w+\((.*)\);?$', response_text, re.S)
    if not match:
        raise ValueError("JSON body not found in response")
    json_str = match.group(1)
    data = json.loads(json_str)
    return [item["url"] for item in data.get("results", [])]


def fetch_resume_data(url):
    try:
        req = requests.get(url)
        if req.status_code != 200:
            return None
        soup = BeautifulSoup(req.content, 'html.parser')
        resume_text = soup.find(class_="single-post-body").text
        job_title = soup.find(class_='media-body').find('h3').text.split(' Resume')[0]
        return {'resume': resume_text, 'job title': job_title}
    except Exception as e:
        print(f"Failed to process {url}: {e}")
        return None


def scrape_resumes(seniority, pages=10):
    data_list = []
    start = 0
    for _ in range(pages):
        params = build_params(start, seniority)
        try:
            response = requests.get('https://cse.google.com/cse/element/v1', params=params, headers=headers)
            urls = extract_urls_from_response(response.text)
            print(f"Found URLs: {urls}")
            for url in urls:
                data = fetch_resume_data(url)
                if data:
                    print(f"Fetched: {data['job title']}")
                    data_list.append(data)
        except Exception as e:
            print(f"Error during scraping: {e}")
        start += 10
    return data_list


def save_to_excel(data, filename="senior.xlsx"):
    df = pd.DataFrame(data)
    df.to_excel(filename, index=False)
    print(f"File saved successfully: {filename}")


if __name__ == "__main__":
    all_data = []
    for seniority_level in ['junior', 'mid', 'senior']:
        all_data += scrape_resumes(seniority_level, pages=10)
    save_to_excel(all_data)
