In [None]:
import random
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
import os
from faker import Faker

# Initialize Faker for generating random addresses
fake = Faker()


# Configuration
total_bags_per_flight = {
    128: 300,
    250: 650,
    300: 780,
    350: 870
}

airline_country_codes = {
    "176": "+971",  # UAE
    "001": "+1",    # USA
    "125": "+44",   # UK
    "157": "+974",  # Qatar
    "131": "+81"    # Japan
}
#file path 
output = widgets.Output()
desktop_path = os.path.expanduser("~/Desktop/A")
os.makedirs(desktop_path, exist_ok=True)
file_path = os.path.join(desktop_path, "luggage_data.csv")

def generate_dataset(passenger_capacity, luggage_dist, slhs_percent, small_rng, large_rng, equal_rng):
    """Generate dataset with strict group sizes and proper phone formats"""
    data = []
    bag_counter = 1
    reservation_counter = 1

    def finalize_group():
        """Finalize a reservation group with proper formatting"""
        nonlocal grouped_bags, reservation_counter
        if not grouped_bags:
            return
        
        handled = random.random() < slhs_percent
        claim = "SLHS Electronic Gates" if handled else random.choice(["Delivery", "Pickup Outside the Terminal"])
        address = fake.address() if claim == "Delivery" else None
        
        res_id = f"RES-{reservation_counter:05d}{len(grouped_bags)}"
        reservation_counter += 1
        
        for bag in grouped_bags:
            bag.update({
                "Reservation_ID": res_id,
                "Handled_by_SLHS": handled,
                "Claim_Option": claim,
                "Delivery_Address": address,
                "Bag_Assignment_Status": "Assigned"
            })
            data.append(bag.copy())
        grouped_bags.clear()

    # Airline setup
    airlines = ["176", "001", "125", "157", "131"]
    iata_codes = {"176": "EK", "001": "AA", "125": "BA", "157": "QR", "131": "JL"}
    flight_numbers = [f"{iata_codes[airline]}{random.randint(1000,9999)}" for airline in airlines[:3]]

    # Process each flight
    for flight_number in flight_numbers:
        airline_code = airlines[flight_numbers.index(flight_number) % 5]
        country_code = airline_country_codes[airline_code]
        grouped_bags = []
        current_target = None
        total_bags = 0

        # Passenger allocation
        passengers = []
        num_0 = int(passenger_capacity * 0.10)
        num_1 = int(passenger_capacity * 0.20)
        num_2 = passenger_capacity - num_0 - num_1

        def generate_phone():
            """Generate phone number with country code and 10 digits"""
            return f"{country_code}{random.randint(1000000000, 9999999999):010d}"

        # Create passengers with 0 bags
        for _ in range(num_0):
            passengers.append((generate_phone(), 0))

        # Create passengers with 1 bag
        for _ in range(num_1):
            if total_bags + 1 > total_bags_per_flight[passenger_capacity]:
                break
            passengers.append((generate_phone(), 1))
            total_bags += 1

        # Create passengers with 2 bags
        for _ in range(num_2):
            if total_bags + 2 > total_bags_per_flight[passenger_capacity]:
                break
            passengers.append((generate_phone(), 2))
            total_bags += 2

        # Process bags with strict grouping
        for phone, num_bags in passengers:
            if num_bags == 0:
                data.append({
                    "Flight_Number": flight_number,
                    "Bag_ID": None,
                    "Phone_Number": phone,
                    "Reservation_ID": None,
                    "Number_of_Bags": 0,
                    "Handled_by_SLHS": None,
                    "Claim_Option": None,
                    "Delivery_Address": None,
                    "Bag_Assignment_Status": "No Luggage"
                })
                continue

            # Create all bags for this passenger first
            passenger_bags = []
            for _ in range(num_bags):
                bag_id = f"{airline_code}-{flight_number}-{bag_counter:06d}"
                bag_counter += 1
                passenger_bags.append({
                    "Flight_Number": flight_number,
                    "Bag_ID": bag_id,
                    "Phone_Number": phone,
                    "Number_of_Bags": num_bags,
                    "Handled_by_SLHS": None,
                    "Claim_Option": None,
                    "Delivery_Address": None,
                })

            # Add bags to groups without splitting
            while passenger_bags:
                # Determine target size if not set
                if not current_target:
                    if luggage_dist == "equal":
                        base_min = min(small_rng[0], equal_rng[0])
                        base_max = max(large_rng[1], small_rng[1])
                        ext_max = equal_rng[1]
                        
                        # 10% chance for extended sizes
                        if ext_max > base_max and random.random() < 0.1:
                            current_target = random.randint(base_max + 1, ext_max)
                        else:
                            current_target = random.randint(base_min, base_max)
                    else:
                        # Use specified distribution range
                        range_map = {
                            "small": small_rng,
                            "large": large_rng
                        }
                        current_target = random.randint(*range_map[luggage_dist])

                # Calculate available space
                available_space = current_target - len(grouped_bags)
                if available_space <= 0:
                    finalize_group()
                    current_target = None
                    continue

                # Add bags to group
                add_now = passenger_bags[:available_space]
                grouped_bags.extend(add_now)
                passenger_bags = passenger_bags[available_space:]

                # Check if group is complete
                if len(grouped_bags) >= current_target:
                    finalize_group()
                    current_target = None

        # Finalize remaining bags
        if grouped_bags:
            finalize_group()

    return pd.DataFrame(data)

# Widget setup
luggage_distribution = widgets.Dropdown(
    options=["small", "large", "equal"],
    value="equal",
    description="Distribution:"
)
small_range = widgets.IntRangeSlider(
    value=[2,3],
    min=1,
    max=20,
    description="Small:",
    continuous_update=False
)
large_range = widgets.IntRangeSlider(
    value=[4,5],
    min=1,
    max=20,
    description="Large:",
    continuous_update=False
)
equal_range = widgets.IntRangeSlider(
    value=[1,7],
    min=1,
    max=20,
    description="Equal:",
    continuous_update=False
)
capacity_dropdown = widgets.Dropdown(
    options=[128, 250, 300, 350],
    value=128,
    description="Passenger Capacity:"
)
slhs_percent = widgets.Dropdown(
    options=[0.25, 0.5, 0.75],
    value=0.5,
    description="SLHS %:"
)
generate_btn = widgets.Button(description="Generate Dataset")

def on_generate(b):
    with output:
        clear_output(wait=True)
        print("🔄 Generating dataset...")
        df = generate_dataset(
            capacity_dropdown.value,
            luggage_distribution.value,
            slhs_percent.value,
            small_range.value,
            large_range.value,
            equal_range.value
        )
        df.to_csv(file_path, index=False)
        print(f"✅ Saved to: {file_path}")
        display(df)

generate_btn.on_click(on_generate)
display(luggage_distribution, small_range, large_range, equal_range,
        capacity_dropdown, slhs_percent, generate_btn, output)