# Setup

In [85]:
import pandas as pd
import random

# Classes

In [86]:
class Car:
    def __init__(self, model, make, year, number_seats, weight_in_kg, price_in_euro, fuel_level_in_percent):
        self.model: str = model
        self.make: str = make
        self.year: int = year
        self.number_seats: int = number_seats
        self.weight_in_kg: int = weight_in_kg
        self.price_in_euro: int = price_in_euro
        self.fuel_level_in_percent: float = fuel_level_in_percent  # 0 - 1

        self.speed: int = 0
        self.acceleration: float = 0


        self.cruise_control: bool = False
        self.cruise_control_speed: int | None = None

        self.engine_temperature: float = 20
        self.engine_rpm: int = 0
        self.engine_oil_pressure: float = 2

        self.tire_pressure: float = 2.5

In [87]:
class Environment:
    def __init__(self, ambient_light_level_in_lux):
        self.ambient_light_level_in_lux: float = ambient_light_level_in_lux  # look at whatsapp
        self.current_timestamp = 0  # seconds

    def step(self, car: Car, next_acceleration: float):
        self.current_timestamp += 1
        car.acceleration = next_acceleration

        # update speed
        car.speed = int(next_acceleration + car.speed)

# Preconfigured cars

In [88]:
# Sample cars
audi_a7 = Car(model="A7", make="Audi", year=2020, number_seats=5, weight_in_kg=1645, price_in_euro=72400, fuel_level_in_percent=0.7)
bmw_3_series = Car(
    model="3 Series", make="BMW", year=2021, number_seats=5, weight_in_kg=1570, price_in_euro=50200, fuel_level_in_percent=0.8
)
mercedes_e_class = Car(
    model="E-Class", make="Mercedes-Benz", year=2019, number_seats=5, weight_in_kg=1780, price_in_euro=64500, fuel_level_in_percent=0.6
)
ford_mustang = Car(
    model="Mustang", make="Ford", year=2022, number_seats=4, weight_in_kg=1685, price_in_euro=56500, fuel_level_in_percent=0.7
)
toyota_camry = Car(
    model="Camry", make="Toyota", year=2020, number_seats=5, weight_in_kg=1525, price_in_euro=38900, fuel_level_in_percent=0.9
)
volkswagen_golf = Car(
    model="Golf", make="Volkswagen", year=2021, number_seats=5, weight_in_kg=1265, price_in_euro=27800, fuel_level_in_percent=0.8
)
tesla_model_s = Car(
    model="Model S", make="Tesla", year=2022, number_seats=5, weight_in_kg=2100, price_in_euro=79900, fuel_level_in_percent=0.85
)
honda_civic = Car(
    model="Civic", make="Honda", year=2019, number_seats=5, weight_in_kg=1295, price_in_euro=25500, fuel_level_in_percent=0.75
)
jaguar_f_type = Car(
    model="F-Type", make="Jaguar", year=2023, number_seats=2, weight_in_kg=1665, price_in_euro=81200, fuel_level_in_percent=0.7
)
# Append cars to a list
car_list = [audi_a7, bmw_3_series, mercedes_e_class, ford_mustang, toyota_camry, volkswagen_golf, tesla_model_s, honda_civic, jaguar_f_type]


def get_car() -> Car:
    return random.choice(car_list)


# Generators

In [89]:
def generate_car() -> Car:
    car: Car = get_car()

    car.year = random.randint(1990, 2024)

    price: int = car.price_in_euro
    for i in range(2024 - car.year):
        change_of_value = random.randint(-700, 100)
        price += change_of_value
    car.price_in_euro = price

    car.fuel_level_in_percent = random.random()

    return car


def generate_env() -> Environment:
    # TODO:
    return Environment(ambient_light_level_in_lux=10_000)


def create_row(car: Car, env: Environment) -> pd.DataFrame:
    row = pd.DataFrame(
        [
            {
                "model": car.model,
                "make": car.make,
                "year": car.year,
                "number_seats": car.number_seats,
                "weight_in_kg": car.weight_in_kg,
                "price_in_euro": car.price_in_euro,
                "fuel_level_in_percent": car.fuel_level_in_percent,
                "ambient_light_level_in_lux": env.ambient_light_level_in_lux,
                "speed": car.speed,
                "acceleration": car.acceleration,
            }
        ]
    )

    return row

# Dataset generator

In [90]:
def generate_dataset(
    number_of_rows: int, number_of_custom_outliers: int, number_of_custom_nulls: int, use_time_inaccuracy: bool, chance_of_drive_end: float
) -> pd.DataFrame:
    output_df = pd.DataFrame()
    # generate car
    car: Car = generate_car()
    env: Environment = generate_env()
    # while not enough rows
    for i in range(number_of_rows):
        # has ride ended?
        if False:  # TODO:
            # generate car
            car = generate_car()
            env = generate_env()

        # get row and add
        # env.getrow TODO:
        row: pd.DataFrame = create_row(car=car, env=env)
        output_df: pd.DataFrame = pd.concat([output_df, row])

        env.step(car=car, next_acceleration=0.5)

    # manipulate data
    # apply outliers and nulls

    # use time inaccuracy

    return output_df

In [91]:
generate_dataset(number_of_rows=100,
                 number_of_custom_outliers=2,
                 number_of_custom_nulls=3,
                 use_time_inaccuracy=False,
                 chance_of_drive_end=0.1)

Unnamed: 0,model,make,year,number_seats,weight_in_kg,price_in_euro,fuel_level_in_percent,ambient_light_level_in_lux,speed,acceleration
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.0
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
...,...,...,...,...,...,...,...,...,...,...
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
0,Camry,Toyota,2021,5,1525,38353,0.864351,10000,0,0.5
