In [85]:
import uuid
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [86]:
csv_file_path = 'fake_data.csv'
df = pd.read_csv(csv_file_path)

In [87]:
df['uuid'] = [str(uuid.uuid4()) for i in range(9999)]

In [88]:
df['data_id'] = np.random.randint(100000, 999999, size=len(df))

In [89]:
df['state'] = 'uploaded'

In [90]:
def generate_start_end_times(num_times):
    start_times = []
    end_times = []
    base_time = datetime(2024, 3, 20, 6, 0, 0)

    for i in range(num_times):
        if i % 4 == 0 and i != 0:
            base_time = base_time + timedelta(days=1)

       
        time_diff_start = np.random.randint(90, 121)
        time_diff_end = np.random.randint(100, 121)

        
        start_time = base_time
        start_times.append(start_time.strftime('%Y-%m-%dT%H:%M:%S+00:00'))

       
        end_time = base_time + timedelta(minutes=time_diff_end)
        end_times.append(end_time.strftime('%Y-%m-%dT%H:%M:%S+00:00'))

        
        base_time = end_time + timedelta(minutes=time_diff_start)

    return start_times, end_times

In [91]:
start_times, end_times = generate_start_end_times(9999)

In [92]:
df['start_time'] = start_times
df['end_time'] = end_times

In [93]:
num_rows = len(df)

vehicles = ['knight_' + str(np.random.randint(50, 101)) for _ in range(num_rows)]

df['vehicle'] = vehicles

In [94]:
names = [
    "Alice", "Bob", "Charlie", "David", "Emily", "Frank", "Grace", "Henry", "Ivy", "Jack",
    "Kate", "Liam", "Mia", "Noah", "Olivia", "Peter", "Quinn", "Rachel", "Sam", "Tracy",
    "Uma", "Victor", "Mendy", "Xavier", "Yvonne", "Zack",
    "Adam", "Benjamin", "Catherine", "Daniel", "Ella", "Fiona", "George", "Hannah", "Isaac", "Jane",
    "Kevin", "Lucy", "Michael", "Nancy", "Oscar", "Pamela", "Quentin", "Rose", "Steven", "Tina",
    "Vincent", "Walter", "Xander", "Yasmine", "Zara"
]


In [95]:
positions = {
    'driver': [],
    'passenger': [],
    'operator': [],
    'instructor': []
}

In [96]:
for index, row in df.iterrows():
    positions['driver'].append(random.choice(names))
    positions['passenger'].append(random.choice(names))
    positions['operator'].append(random.choice(names))
    positions['instructor'].append(random.choice(names))

In [97]:
df['driver'] = positions['driver']
df['passenger'] = positions['passenger']
df['operator'] = positions['operator']
df['instructor'] = positions['instructor']


In [98]:
df['git_sha'] = 'a7e5f5f6c48bfb037de10b99105605a8f071434700a5a64c5dd99073b33d8449'

In [99]:
df['git_branch'] = 'github.com/sncrsenyurt'

In [100]:
data = {'run_type': [''] * 9999}

options = ['normal_ar', 'dot_to_dot']
df['run_type'] = df['run_type'].apply(lambda x: random.choice(options))

In [101]:
data = {'autonomus_mode': [''] * 9999}

options = ['autonomus', 'manual']
weights = [0.95, 0.05]

df['run_type'] = df['autonomous_mode'].apply(lambda x: random.choices(options, weights)[0])

In [102]:
df['public_roads'] = 'True'

In [103]:
def format_start_time(start_time):
    formatted_time = start_time[:10].replace('-', '') + start_time[11:19].replace(':', '')
    return formatted_time

df['meta_id'] = df['start_time'].apply(lambda x: format_start_time(x)) +' - '+ df['vehicle'].astype(str)

In [104]:
start_number = random.randint(100000, 999999)

missions = [f"TASK-{start_number + i * random.randint(10, 15)}" for i in range(len(df))]

df['mission'] = missions

In [105]:
last_end_km = {}


for index, row in df.iterrows():
    vehicle = row['vehicle']
    start_km = row['total_start_km']
    end_km = row['total_end_km']
    
    
    if vehicle in last_end_km and not np.isnan(last_end_km[vehicle]):
        last_end = last_end_km[vehicle]
        
        
        start_km = random.randint(last_end, last_end + 50)
        
        
        end_km = start_km + random.randint(50, 200)
    else:
        
        start_km = random.randint(1000, 2000)
        end_km = start_km + random.randint(50, 200)
    
    
    last_end_km[vehicle] = end_km
    
    
    total_delta_km = end_km - start_km
    
    
    df.at[index, 'total_start_km'] = start_km
    df.at[index, 'total_end_km'] = end_km
    df.at[index, 'total_delta_km'] = total_delta_km

In [106]:

for index, row in df.iterrows():
    
    total_start_km = row['total_start_km']
    total_end_km = row['total_end_km']
    
    
    run_type = str(row['run_type']).lower()
    if run_type == 'autonomus':
        
        autonom_start_km = random.uniform(total_start_km, total_end_km)
        
        
        autonom_end_km = random.uniform(autonom_start_km, total_end_km)
        
        
        autonomous_delta_km = autonom_end_km - autonom_start_km
        
        df.at[index, 'autonomous_start_km'] = autonom_start_km
        df.at[index, 'autonomous_end_km'] = autonom_end_km
        df.at[index, 'autonomous_delta_km'] = autonomous_delta_km
    else:
        
        df.at[index, 'autonomous_start_km'] = 0
        df.at[index, 'autonomous_end_km'] = 0
        df.at[index, 'autonomous_delta_km'] = 0
        
    
    extra_distance = random.uniform(0, 20)
    total_end_km += extra_distance
    
    
    df.at[index, 'total_end_km'] = total_end_km

In [107]:
df['interventions'] = np.random.randint(0, 31, size=len(df))
df.loc[df['run_type'] == 'manual', 'interventions'] = 0

In [108]:
condition1 = df['interventions'] == 0
condition2 = df['autonomous_delta_km'] == 0


df['km_per_interventions'] = np.where(condition1 | condition2, 0, df['autonomous_delta_km'] / np.where(df['interventions'] == 0, 1, df['interventions']))

In [109]:
autonomous_delta_km = df['autonomous_delta_km']

total_autonomous_seconds = []

for delta_km in autonomous_delta_km:
    delta_km_int = int(delta_km)  
    multiplier = random.randint(100, 300)
    random_seconds = random.randint(delta_km_int, delta_km_int * multiplier)
    total_autonomous_seconds.append(random_seconds)

df['total_autonomous_seconds'] = total_autonomous_seconds

In [110]:
df['total_autonomous_hours'] = df['total_autonomous_seconds'] / 3600

In [111]:
criteria = ['autonomous_delta_km', 'total_autonomous_seconds', 'interventions', 'total_delta_km']
weights = [0.25, 0.6, 0.05, 0.1]

df['mission_total_point'] = (df[criteria] * weights).sum(axis=1)

In [112]:
df.to_csv('updated_fake_data.csv', index=False)