In [4]:
import pandas as pd
import numpy as np
from faker import Faker
import random


fake = Faker()
np.random.seed(42)
random.seed(42)
Faker.seed(42)   

n = 100_000

data = {
    "recruiter_id": [f"R{100000+i}" for i in range(n)],
    "company": [fake.company() for _ in range(n)],
    "logins_last_14d": np.random.poisson(lam=3, size=n),
    "job_posts_last_14d": np.random.poisson(lam=1.5, size=n),
    "avg_applicant_quality": np.round(np.random.normal(loc=70, scale=10, size=n), 2),
    "support_tickets_last_30d": np.random.poisson(lam=0.5, size=n),
    "ai_tool_usage_count": np.random.poisson(lam=0.8, size=n),
    "last_active_date": [fake.date_between(start_date='-30d', end_date='today') for _ in range(n)]
}


df = pd.DataFrame(data)

# apply constraint
df['avg_applicant_quality'] = df['avg_applicant_quality'].clip(0, 100)
df['support_tickets_last_30d'] = df['support_tickets_last_30d'].clip(0, 5)
df['ai_tool_usage_count'] = df['ai_tool_usage_count'].clip(0, 10)

# Save to CSV with full path display
import os

try:
    
    current_dir = os.getcwd()
    file_path = os.path.join(current_dir, "recruiter_engagement_data.csv")
    
    df.to_csv("recruiter_engagement_data.csv", index=False)
    
    print(f"Successfully created CSV with {len(df)} records")
    print(f"File saved at: {file_path}")
    print(f"File exists: {os.path.exists(file_path)}")
    print(f"File size: {os.path.getsize(file_path) / 1024:.2f} KB")
    
    print("\nFirst few rows:")
    print(df.head())
    
except Exception as e:
    print(f"Error saving CSV: {e}")


Successfully created CSV with 100000 records
File saved at: C:\Users\user\recruiter_engagement_data.csv
File exists: True
File size: 5088.12 KB

First few rows:
  recruiter_id                          company  logins_last_14d  \
0      R100000  Rodriguez, Figueroa and Sanchez                4   
1      R100001                        Doyle Ltd                1   
2      R100002    Mcclain, Miller and Henderson                3   
3      R100003                   Davis and Sons                3   
4      R100004      Guzman, Hoffman and Baldwin                2   

   job_posts_last_14d  avg_applicant_quality  support_tickets_last_30d  \
0                   3                  72.49                         1   
1                   1                  68.59                         0   
2                   5                  58.40                         1   
3                   5                  77.88                         0   
4                   2                  78.92                