In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Function to generate random dates
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

# Generate unique 12 digit customer IDs
def generate_customer_id():
    return random.randint(100000000000, 999999999999)

# Generate unique 6 letter PNR codes
def generate_pnr():
    return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ', k=6))

# Generate ffp_program
def generate_ffp_program():
    prob = random.random()
    if prob <= 0.55:
        return 'No'
    elif prob <= 0.80:
        return 'Silver'
    elif prob <= 0.90:
        return 'Gold'
    elif prob <= 0.98:
        return 'Platinum'
    elif prob <= 0.995:
        return 'Diamond'
    else:
        return 'Black'

# Generate disruptions
def generate_disruptions():
    disruptions = {'escala': 0, 'delayed': 0, 'anticipated': 0, 'canceled': 0, 'rescheduled': 0, 'baggage': 0}
    while sum(disruptions.values()) == 0:
        disruptions[random.choice(list(disruptions.keys()))] = 1
    return disruptions

# Generate lawsuit
def generate_lawsuit():
    return 1 if random.random() <= 0.005 else 0


In [None]:
#  Simulation parameters
n_rows = 10000
start_date = datetime(2015, 1, 1)
end_date = datetime(2024, 7, 1)

data = []

for _ in range(n_rows):
    customer_id = generate_customer_id()
    dep_datetime = random_date(start_date, end_date)
    pnr_date = random_date(start_date, dep_datetime - timedelta(days=1))
    while pnr_date >= dep_datetime:
        pnr_date = random_date(start_date, dep_datetime - timedelta(days=1))
    pnr = generate_pnr()
    ffp_program = generate_ffp_program()
    disruptions = generate_disruptions()
    lawsuit = generate_lawsuit()
    
    row = {
        'customer_id': customer_id,
        'dep_datetime': dep_datetime.strftime('%Y-%m-%dT%H:%M:%S'),
        'pnr': pnr,
        'pnr_date': pnr_date.strftime('%Y-%m-%dT%H:%M:%S'),
        'ffp_program': ffp_program,
        'lawsuit': lawsuit
    }
    row.update(disruptions)
    
    data.append(row)

df = pd.DataFrame(data)
print(df.head())