In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Set random seed for reproducibility
np.random.seed(42)

# CONVERSION ABC

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of rows in the dataset
num_rows = 1000

# Generate random activation dates
start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 12, 31)

# Generate data for IsConversion column with a normal distribution for each group
is_conversion_probabilities = {
    'Group_A': np.random.normal(loc=0.5, scale=0.2, size=num_rows),
    'Group_B': np.random.normal(loc=0.6, scale=0.2, size=num_rows),
    'Group_C': np.random.normal(loc=0.7, scale=0.2, size=num_rows)
}

is_conversion = np.concatenate([
    (probabilities > 0.5).astype(int) for probabilities in is_conversion_probabilities.values()
])

# Generate random activation dates for each group
activation_dates = np.concatenate([
    [np.random.choice(pd.date_range(start_date, end_date)) for _ in range(num_rows)]
    for _ in range(len(is_conversion_probabilities))
])

# Generate random group names
groups = np.concatenate([
    [group] * num_rows for group in is_conversion_probabilities.keys()
])

# Create DataFrame
df = pd.DataFrame({
    'IsConversion': is_conversion,
    'ActivationDate': activation_dates,
    'Group': groups
})

# Sort DataFrame by ActivationDate and Group
df = df.sort_values(by=['Group', 'ActivationDate']).reset_index(drop=True)

# Generate random conversion dates based on activation dates for each group
df['ConversionDate'] = df.groupby('Group')['ActivationDate'].transform(lambda x: x + pd.to_timedelta(np.random.randint(1, 30, size=len(x)), unit='D'))

# Write DataFrame to CSV
df.to_csv('conversionABC.csv', index=False)

# Display the DataFrame
print(df.head())


   IsConversion ActivationDate    Group ConversionDate
0             0     2022-01-01  Group_A     2022-01-10
1             0     2022-01-01  Group_A     2022-01-11
2             1     2022-01-01  Group_A     2022-01-22
3             1     2022-01-01  Group_A     2022-01-24
4             0     2022-01-03  Group_A     2022-01-30
