In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Set random seed for reproducibility
np.random.seed(42)

In [3]:
# Number of rows in the dataset
num_rows = 1000

# Generate data for IsConversion column with a normal distribution
is_conversion_probabilities = np.random.normal(loc=0.5, scale=0.2, size=num_rows)
is_conversion = (is_conversion_probabilities > 0.5).astype(int)

# Generate random activation dates
start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 12, 31)
activation_dates = pd.to_datetime([np.random.choice(pd.date_range(start_date, end_date)) for _ in range(num_rows)])

# Generate random conversion dates based on activation dates
conversion_dates = activation_dates + pd.to_timedelta(np.random.randint(1, 30, size=num_rows), unit='D')

# Generate random group names
groups = np.random.choice(['Group_A', 'Group_B', 'Group_C'], size=num_rows)

# Create DataFrame
df = pd.DataFrame({
    'IsConversion': is_conversion,
    'ActivationDate': activation_dates,
    'ConversionDate': conversion_dates,
    'Group': groups
})

# Sort DataFrame by ActivationDate
df = df.sort_values(by='ActivationDate').reset_index(drop=True)

# Write DataFrame to CSV
df.to_csv('conversionABC.csv', index=False)

# Display the DataFrame
print(df.head())


   IsConversion ActivationDate ConversionDate    Group
0             1     2022-01-01     2022-01-22  Group_B
1             0     2022-01-01     2022-01-13  Group_A
2             1     2022-01-01     2022-01-03  Group_C
3             0     2022-01-02     2022-01-31  Group_C
4             1     2022-01-02     2022-01-27  Group_B
