In [None]:
import pandas as pd
import random
import os
import time
import numpy as np

In [None]:
list(range(1, 4 + 1))

In [None]:


def gen_data2(
    num_people,
    num_choices,
    alternatives,
    numerical_specs,  
    category_specs,
    availability_probs=None,
):
    random.seed(time.time())
    np.random.seed(int(time.time()))

    #checking numerical_specs is provided
    if numerical_specs is None:
        raise ValueError("You must provide 'numerical_specs' for this version.")
    num_numeric_features = len(numerical_specs)
    num_categorical_features = len(category_specs)

    
    #alternatives = list(range(1, num_alternatives + 1))
    num_alternatives = len(alternatives)

    #checking availability_probs
    if availability_probs[1] is None:
        availability_probs[1] = [0.8] * num_alternatives
    elif len(availability_probs[1]) != num_alternatives:
        raise ValueError("Length of availability_probs must match num_alternatives")

    #Numerical features naming
    if any('Name' in spec for spec in numerical_specs) == True:
        numeric_features = [spec['Name'] for spec in numerical_specs]
    else:
        numeric_features = [f"Feature{i+1}" for i in range(num_numeric_features)]


    #Creating Dictionary for categorical features (name : levels)
    if any('Name' in spec for spec in category_specs) == True:
        categorical_features = {spec['Name']: spec['level'] for spec in category_specs}
    else:
        categorical_features = {}
        for spec in category_specs:
            i = category_specs.index(spec)
            categorical_features [f"Category{i+1}"] = spec['level']

    data = []

    for i in range(1, num_people + 1):
        for j in range(1, num_choices + 1):
            person_id = i
            choice_id = j

            ### Determining availability ###
            # Always available
            if availability_probs[0] == 1:
                availability = {f"Avail.{alt}": 1 for alt in alternatives}
            else:
                #randomly generate availability for each alternative
                availability = {
                    f"Avail.{alt}": int(random.random() < availability_probs[1][idx])
                    for idx, alt in enumerate(alternatives)
                    }
                #at least one alternative must be available
                if sum(availability.values()) == 0:
                    forced_alt = random.choice(alternatives)
                    availability[f"Avail.{forced_alt}"] = 1

            available_alts = [alt for alt in alternatives if availability[f"Avail.{alt}"] == 1]
            chosen_alt = random.choice(available_alts)

            entry = {
                'P.ID': person_id,
                'C.ID': choice_id,
            }
            entry.update(availability)

            # Generate independent features based on min, max, mean
            for idx, name in enumerate(numeric_features):
                f_range = numerical_specs[idx]
                min_val = f_range['min']
                max_val = f_range['max']
                mean_val = f_range['mean']
                value = np.random.triangular(min_val, mean_val, max_val)
                entry[name] = round(value, 2)

            for cat_feature, options in categorical_features.items():
                entry[cat_feature] = random.choice(options)

            entry['Choice'] = chosen_alt
            data.append(entry)
    
    df = pd.DataFrame(data)
    #filename = get_next_filename()
    #df.to_csv(filename, index=False)
    #print(f"Biogeme-ready CSV saved as {filename}")
    return df

In [158]:
'To-Do: continue this'
'1. update the function s.t. it can take all of attributes of each alternatives'

car_attribute = ["Car",
        {'Name': "TravelCost", 'min': 20, 'max': 120, 'mean': 80},
        {'Name': "TravelTime",'min': 10, 'max': 90, 'mean': 40},
        {'Name': "ServiceCost",'min': 5, 'max': 20, 'mean': 10}
    ]

bus_attribute = ["Bus",
        {'Name': "TravelCost", 'min': 10, 'max': 60, 'mean': 30},
        {'Name': "TravelTime",'min': 20, 'max': 90, 'mean': 50},
        {'Name': "AccessTime",'min': 4, 'max': 18, 'mean': 12}
    ]

def attribute_gen(attribute_specs):
    attributes_name = [attr['Name'] + "_" + attribute_specs[0] for attr in attribute_specs[1:]]
    #print(attributes_name)
    for idx, name in enumerate(attributes_name):
        f_range = attribute_specs[idx + 1]
        min_val = f_range['min']
        max_val = f_range['max']
        mean_val = f_range['mean']
        value = np.random.triangular(min_val, mean_val, max_val)
        entry[name] = round(value, 2)
    print (entry)
entry = {}
attribute_gen(car_attribute)
attribute_gen(bus_attribute)


print(entry)

{'TravelCost_Car': 63.85, 'TravelTime_Car': 67.67, 'ServiceCost_Car': 11.04}
{'TravelCost_Car': 63.85, 'TravelTime_Car': 67.67, 'ServiceCost_Car': 11.04, 'TravelCost_Bus': 33.71, 'TravelTime_Bus': 34.9, 'AccessTime_Bus': 11.37}
{'TravelCost_Car': 63.85, 'TravelTime_Car': 67.67, 'ServiceCost_Car': 11.04, 'TravelCost_Bus': 33.71, 'TravelTime_Bus': 34.9, 'AccessTime_Bus': 11.37}


In [None]:
## Define alternatives
alternatives = ["Car", "Bus", "Train"]
car_attribute = [alternatives[0],
        {'Name': "TravelCost", 'min': 20, 'max': 120, 'mean': 80},
        {'Name': "TravelTime",'min': 10, 'max': 90, 'mean': 40},
        {'Name': "ServiceCost",'min': 5, 'max': 20, 'mean': 10}
    ]

bus_attribute = [alternatives[1],
        {'Name': "TravelCost", 'min': 10, 'max': 60, 'mean': 30},
        {'Name': "TravelTime",'min': 20, 'max': 90, 'mean': 50},
        {'Name': "AccessTime",'min': 4, 'max': 18, 'mean': 12}
    ]

numerical_specs = [
        {'Name': "TravelCost", 'min': 15, 'max': 80, 'mean': 40},
        {'Name': "TravelTime",'min': 15, 'max': 70, 'mean': 35},
        {'Name': "ServiceCost",'min': 3, 'max': 15, 'mean': 7}
    ]



entry = {}
for idx, name in enumerate(numeric_features):
    print(numerical_specs[idx], name)
    f_range = numerical_specs[idx]
    min_val = f_range['min']
    max_val = f_range['max']
    mean_val = f_range['mean']
    value = np.random.triangular(min_val, mean_val, max_val)
    entry[name] = round(value, 2)

print(entry)

KeyError: 1

In [None]:
## Define alternatives
alternatives = ["Car", "Bus", "Train"]

## define alternatives availability
# Always available: (1, None)
# Randomly available: (0, [0.9, 0.6, 0.3])  # Example probabilities for 3 alternatives
alt_probs = (1, [0.9, 0.6, 0.3])

feature_specs = [
        {'min': 20, 'max': 120, 'mean': 80},
        {'min': 10, 'max': 90, 'mean': 40},
        {'min': 30, 'max': 120, 'mean': 60}
    ]


car_attribute = [alternatives[0],
        {'Name': "TravelCost", 'min': 20, 'max': 120, 'mean': 80},
        {'Name': "TravelTime",'min': 10, 'max': 90, 'mean': 40},
        {'Name': "ServiceCost",'min': 5, 'max': 20, 'mean': 10}
    ]

bus_attribute = [alternatives[1],
        {'Name': "TravelCost", 'min': 10, 'max': 60, 'mean': 30},
        {'Name': "TravelTime",'min': 20, 'max': 90, 'mean': 50},
        {'Name': "AccessTime",'min': 4, 'max': 18, 'mean': 12}
    ]

train_attribute = [alternatives[2],
        {'Name': "TravelCost", 'min': 15, 'max': 80, 'mean': 40},
        {'Name': "TravelTime",'min': 15, 'max': 70, 'mean': 35},
        {'Name': "ServiceCost",'min': 3, 'max': 15, 'mean': 7}
    ]

## Define attribute for each alternatives (make sure each alternatives has attributes)



#types:
# 1: ordinal (e.g., High, Medium, Low)
# 2: nominal (e.g., Car, Bus, Train)
# 3: binary (e.g., Yes/No)
# Level: for ordinal (Ascending order), number of categories for nominal/binary 
category_specs = [
        {'Name': "ServiceLevel", 'type': 1, 'level': ("1","2","3")},
        {'Name': "ServiceType" , 'type': 2, 'level': ("Wi-fi","AC","Free Snacks")},
        {'Name': "Subscription", 'type': 3, 'level': ("Yes","No")} 
    ]

category_specs2 = [
        {'type': 1, 'level': ("1","2","3")},
        {'type': 2, 'level': ("Wi-fi","AC","Free Snacks")},
        {'type': 3, 'level': ("Yes","No")} 
    ]

'To-Do: Update the data generation function'
'1. Feature for each alternatives!'


dataset3 = gen_data2(
        num_people=5,
        num_choices=2,
        alternatives=alternatives,
        numerical_specs= numerical_specs,
        category_specs=category_specs,
        availability_probs=alt_probs
    )
dataset3