In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(42)

# Parameters
SIMULATION_DAYS = 200  # Increased to capture longer processes
MEDIAN_STATUS_CHANGE_DAYS = 5
SIGMA_TIME = 1.0

# Interview probability parameters
P_PEAK = 0.3  # Peak probability of getting an interview
P_MIN = 0.02  # Minimum probability
PEAK_TIME = 10  # Time at which the probability peaks
SIGMA_PROB = 5  # Controls the spread of the peak

# New parameters
P_FURTHER_INTERVIEW = 0.5  # Probability of getting further interviews after recruiter call
RECRUITER_CALL_MIN_DAYS = 7
RECRUITER_CALL_MAX_DAYS = 21
POST_CALL_MIN_DAYS = 1
POST_CALL_MAX_DAYS = 4

def simulate_daily_applications():
    # Simulate the number of applications for a day
    return np.random.randint(10, 21)  # Random integer between 10 and 20

def generate_time_until_status_change(median=5, sigma=1.0):
    mu = np.log(median)
    # Generate time until status change from a log-normal distribution
    time = np.random.lognormal(mean=mu, sigma=sigma)
    return max(1, int(round(time)))  # Ensure at least 1 day

def determine_application_outcome(T_i, p_peak=P_PEAK, p_min=P_MIN, peak_time=PEAK_TIME, sigma=SIGMA_PROB):
    # Calculate the interview probability based on a Gaussian function
    interview_probability = p_min + (p_peak - p_min) * np.exp(-((T_i - peak_time) ** 2) / (2 * sigma ** 2))
    # Ensure the probability is between p_min and p_peak
    interview_probability = np.clip(interview_probability, p_min, p_peak)
    # Determine the outcome
    outcome = np.random.choice(['interview', 'rejection'], p=[interview_probability, 1 - interview_probability])
    return outcome, interview_probability  # Return probability for analysis

def generate_recruiter_call_time():
    # Time between status change and recruiter call
    return np.random.randint(RECRUITER_CALL_MIN_DAYS, RECRUITER_CALL_MAX_DAYS + 1)

def generate_post_call_time():
    # Time between recruiter call and post-call outcome
    return np.random.randint(POST_CALL_MIN_DAYS, POST_CALL_MAX_DAYS + 1)

def determine_post_call_outcome(p_further=P_FURTHER_INTERVIEW):
    # Determine the outcome after recruiter call
    outcome = np.random.choice(['further interviews', 'rejection'], p=[p_further, 1 - p_further])
    return outcome

# Initialize a DataFrame to store all applications
applications = pd.DataFrame(columns=[
    'application_date', 'status_change_date', 'T_i',
    'outcome', 'interview_probability',
    'recruiter_call_date', 'post_call_outcome_date',
    'post_call_outcome'
])

# Simulation loop
for day in range(1, SIMULATION_DAYS + 1):
    # Simulate daily applications
    num_applications = simulate_daily_applications()
    for _ in range(num_applications):
        # Application date is the current day
        application_date = day
        # Generate time until status change
        T_i = generate_time_until_status_change(median=MEDIAN_STATUS_CHANGE_DAYS, sigma=SIGMA_TIME)
        status_change_date = application_date + T_i
        # Add application to the DataFrame
        applications = applications.append({
            'application_date': application_date,
            'status_change_date': status_change_date,
            'T_i': T_i,
            'outcome': None,  # Will be determined later
            'interview_probability': None,  # Will be calculated later
            'recruiter_call_date': None,  # Will be determined if applicable
            'post_call_outcome_date': None,  # Will be determined if applicable
            'post_call_outcome': None  # Will be determined if applicable
        }, ignore_index=True)
    
    # Process applications where the status changes on the current day
    mask_status_change = (applications['status_change_date'] == day) & (applications['outcome'].isnull())
    indices_to_update = applications.index[mask_status_change]
    for idx in indices_to_update:
        T_i = applications.at[idx, 'T_i']
        outcome, interview_probability = determine_application_outcome(T_i)
        applications.at[idx, 'outcome'] = outcome
        applications.at[idx, 'interview_probability'] = interview_probability
        # If the outcome is 'interview', schedule recruiter call
        if outcome == 'interview':
            R_i = generate_recruiter_call_time()
            recruiter_call_date = applications.at[idx, 'status_change_date'] + R_i
            applications.at[idx, 'recruiter_call_date'] = recruiter_call_date

    # Process recruiter calls scheduled on the current day
    mask_recruiter_call = (applications['recruiter_call_date'] == day) & (applications['post_call_outcome'].isnull())
    indices_to_update = applications.index[mask_recruiter_call]
    for idx in indices_to_update:
        # Generate time until post-call outcome
        S_i = generate_post_call_time()
        post_call_outcome_date = applications.at[idx, 'recruiter_call_date'] + S_i
        applications.at[idx, 'post_call_outcome_date'] = post_call_outcome_date

    # Process post-call outcomes scheduled on the current day
    mask_post_call_outcome = (applications['post_call_outcome_date'] == day) & (applications['post_call_outcome'].isnull())
    indices_to_update = applications.index[mask_post_call_outcome]
    for idx in indices_to_update:
        # Determine the post-call outcome
        post_call_outcome = determine_post_call_outcome()
        applications.at[idx, 'post_call_outcome'] = post_call_outcome

# Post-simulation analysis
total_applications = len(applications)
total_status_change_interviews = applications['outcome'].value_counts().get('interview', 0)
total_status_change_rejections = applications['outcome'].value_counts().get('rejection', 0)
total_recruiter_calls = applications['recruiter_call_date'].notnull().sum()
total_post_call_further_interviews = applications['post_call_outcome'].value_counts().get('further interviews', 0)
total_post_call_rejections = applications['post_call_outcome'].value_counts().get('rejection', 0)
average_time_to_status_change = (applications['status_change_date'] - applications['application_date']).mean()

print(f"Total Applications: {total_applications}")
print(f"Total Interviews at Status Change: {total_status_change_interviews}")
print(f"Total Rejections at Status Change: {total_status_change_rejections}")
print(f"Total Recruiter Calls Scheduled: {total_recruiter_calls}")
print(f"Total Further Interviews After Recruiter Call: {total_post_call_further_interviews}")
print(f"Total Rejections After Recruiter Call: {total_post_call_rejections}")
print(f"Average Time to Status Change: {average_time_to_status_change:.2f} days")

# Visualize the interview probability vs. time until status change
Ti_values = np.linspace(0, applications['T_i'].max(), 100)
interview_probabilities = P_MIN + (P_PEAK - P_MIN) * np.exp(-((Ti_values - PEAK_TIME) ** 2) / (2 * SIGMA_PROB ** 2))

plt.figure(figsize=(8, 6))
plt.plot(Ti_values, interview_probabilities)
plt.title('Interview Probability vs. Time Until Status Change')
plt.xlabel('Time Until Status Change (Days)')
plt.ylabel('Interview Probability')
plt.grid(True)
plt.show()

# Histogram of Time Until Status Change for Different Outcomes
interviews = applications[applications['outcome'] == 'interview']
rejections = applications[applications['outcome'] == 'rejection']

plt.figure(figsize=(8, 6))
plt.hist(interviews['T_i'], bins=range(1, int(applications['T_i'].max()) + 1), alpha=0.5, label='Interviews', edgecolor='black')
plt.hist(rejections['T_i'], bins=range(1, int(applications['T_i'].max()) + 1), alpha=0.5, label='Rejections', edgecolor='black')
plt.title('Distribution of Time Until Status Change by Outcome')
plt.xlabel('Time Until Status Change (Days)')
plt.ylabel('Number of Applications')
plt.legend()
plt.show()

# Histogram of Recruiter Call Times
plt.figure(figsize=(8, 6))
plt.hist(applications['recruiter_call_date'] - applications['status_change_date'], bins=range(RECRUITER_CALL_MIN_DAYS, RECRUITER_CALL_MAX_DAYS + 1), edgecolor='black')
plt.title('Distribution of Time Between Status Change and Recruiter Call')
plt.xlabel('Days')
plt.ylabel('Number of Recruiter Calls')
plt.show()

# Histogram of Post-Call Outcome Times
post_call_times = applications['post_call_outcome_date'] - applications['recruiter_call_date']
post_call_times = post_call_times.dropna()
plt.figure(figsize=(8, 6))
plt.hist(post_call_times, bins=range(POST_CALL_MIN_DAYS, POST_CALL_MAX_DAYS + 1), edgecolor='black')
plt.title('Distribution of Time Between Recruiter Call and Post-Call Outcome')
plt.xlabel('Days')
plt.ylabel('Number of Outcomes')
plt.show()


In [1]:
import numpy as np

# Define mean (rate) parameters for weekdays and weekends
lambda_weekday = 15  # Average number of applications on weekdays
lambda_weekend = 5   # Average number of applications on weekends

# Function to draw number of applications based on day type
def get_applications_count(day_type):
    if day_type == 'weekday':
        return np.random.poisson(lambda_weekday)
    elif day_type == 'weekend':
        return np.random.poisson(lambda_weekend)
    else:
        raise ValueError("day_type should be either 'weekday' or 'weekend'")

# Example usage
day_type = 'weekday'  # Change to 'weekend' as needed
A_t = get_applications_count(day_type)

print(f"Number of applications submitted on {day_type}: {A_t}")

Number of applications submitted on weekday: 18


In [None]:
A_list = []
for i in range(10):
    A = get_applications_count()
    #store the values of A in a list
    A_list.append(A)

print(A_list)