# Dummy Dataset For Used Car Website

In [3]:
!pip install faker



In [4]:
!pip install tabulate



In [5]:
from faker import Faker
from tabulate import tabulate
import random
from datetime import datetime, timedelta
import csv

In [12]:
FAKER = Faker('id_ID')

### Fuction to extract csv file into dictionary

In [19]:
def csv_to_dict(filename):
    """
    Fungsi untuk ekstrak file csv menjadi list of dictionary

    arg:
        - filename (str) : nama file csv yang akan dibuka
    return:
        - data  (list) :  list of dictionary
    """

    # open csv file
    with open(f'{filename}', mode='r', encoding='utf-8-sig') as file:
        csv_reader = csv.DictReader(file)

        # save it in the form of a list of dictionaries
        data = {}
        for row in csv_reader:
            for key, value in row.items():
                # setdefault() to add key to result_dict
                # The value of the key is filled with an empty list first
                # empty list is filled with the append method per row of data
                data.setdefault(key, []).append(value)

    return data

### Function to show table

In [20]:
def show_data(table):
    """
    Fungsi untuk menampilkan data

    arg:
        - table (dict) : data dictionary yang ingin ditampilkan

    return:
        None
    """

    tab = tabulate(tabular_data = table,
                   headers = table.keys(),
                   tablefmt = "psql",
                   numalign = "center")
    print(tab)

## Dummy dataset 
There are 6 following tables that are gonna be made in order.
1. cities
2. users
3. cars
4. advertisements
5. bids
6. bid_details

### 1. Dummy data tabel cities


In [21]:
city_table = csv_to_dict('cities.csv')
show_data(city_table)

+-----------+----------------------+---------------------------+
|  city_id  | city_name            | location                  |
|-----------+----------------------+---------------------------|
|   3171    | Kota Jakarta Pusat   | (106.834091, -6.186486)   |
|   3172    | Kota Jakarta Utara   | (106.774124, -6.121435)   |
|   3173    | Kota Jakarta Barat   | (106.813301, -6.1352)     |
|   3174    | Kota Jakarta Selatan | (106.814095, -6.300641)   |
|   3175    | Kota Jakarta Timur   | (106.895859, -6.264451)   |
|   3573    | Kota Malang          | (112.626503, -7.981894)   |
|   3578    | Kota Surabaya        | (112.734398, -7.289166)   |
|   3471    | Kota Yogyakarta      | (110.368797, -7.797224)   |
|   3273    | Kota Bandung         | (107.6098111, -6.9147444) |
|   1371    | Kota Padang          | (100.3530556, -0.95)      |
|   1375    | Kota Bukittinggi     | (100.3691667, -0.3055556) |
|   6471    | Kota Balikpapan      | (116.8278833, -1.2635389) |
|   6472    | Kota Samari

### 2. Dummy data tabel users

In [22]:
def generate_name(n_name):
    """
    Fungsi untuk membuat dummy names

    args:
    - n_name (int): jumlah data nama yang ingin dibuat

    returns:
    names (list): list nama yang sudah dibuat
    """

    names = list()

    while len(names) < n_name:

        first_name = FAKER.first_name()
        last_name = FAKER.last_name()

        full_name = (f'{first_name} {last_name}')
        if full_name not in names:
            names.append(full_name)

    return names

In [31]:
def users_generator(n_user, is_print):
    """
    Fungsi untuk membuat dummy dataset tabel users
    header:
        - user_id
        - city_id
        - name
        - e-mail
        - phone_number
        - address

    args:
        - n_user (int) : Jumlah user yang akan dibuat

    return:
        - table  (list)  
    """

    # Create table
    table = {}
    table["user_id"] = [i+1 for i in range(n_user)]
    table['city_id'] = [random.choice(city_table['city_id']) for i in range(n_user)]
    names = generate_name(n_user)
    table['name'] = [f"{name.title()}" \
                      for name in names]
    table['email'] = [f"{name.lower().replace(' ', '')}@{FAKER.free_email_domain()}" \
                      for name in names]
    table['phone_number'] = [FAKER.phone_number() for i in range(n_user)]
    table['address'] = [FAKER.street_address() for i in range(n_user)]
    lat_log = [FAKER.local_latlng(country_code = 'ID', coords_only = True) for i in range(n_user)]
    
    # Print table
    if is_print:
        show_data(table)

    return table

In [56]:
user_table = users_generator(40, True)

+-----------+-----------+-------------------+------------------------------+---------------------+-------------------------------+
|  user_id  |  city_id  | name              | email                        | phone_number        | address                       |
|-----------+-----------+-------------------+------------------------------+---------------------+-------------------------------|
|     1     |   3171    | Cagak Januar      | cagakjanuar@gmail.com        | +62 (062) 796 0747  | Gg. Pelajar Pejuang No. 0     |
|     2     |   5171    | Sakti Halim       | saktihalim@gmail.com         | 087 314 2045        | Gg. BKR No. 02                |
|     3     |   3173    | Amelia Kusumo     | ameliakusumo@gmail.com       | +62-91-056-4392     | Gg. Laswi No. 493             |
|     4     |   3173    | Kasusra Wibisono  | kasusrawibisono@hotmail.com  | +62-80-199-7548     | Jalan Dr. Djunjunan No. 6     |
|     5     |   3171    | Salman Hutapea    | salmanhutapea@yahoo.com      | +62-04

### 3. Dummy data tabel cars

In [57]:
car_csv = csv_to_dict('cars.csv')
show_data(car_csv)

+----------+----------+-----------------+-------------+--------+-----------+
|  car_id  | brand    | model           | body_type   |  year  |   price   |
|----------+----------+-----------------+-------------+--------+-----------|
|    1     | Toyota   | Toyota Yaris    | Hatchback   |  2016  | 175000000 |
|    2     | Toyota   | Toyota Yaris    | Hatchback   |  2018  | 215000000 |
|    3     | Toyota   | Toyota Yaris    | Hatchback   |  2014  | 162000000 |
|    4     | Toyota   | Toyota Yaris    | Hatchback   |  2020  | 220000000 |
|    5     | Toyota   | Toyota Yaris    | Hatchback   |  2012  | 124000000 |
|    6     | Toyota   | Toyota Agya     | Hatchback   |  2019  | 114000000 |
|    7     | Toyota   | Toyota Agya     | Hatchback   |  2014  | 97000000  |
|    8     | Toyota   | Toyota Agya     | Hatchback   |  2016  | 110000000 |
|    9     | Toyota   | Toyota Agya     | Hatchback   |  2022  | 155500000 |
|    10    | Toyota   | Toyota Agya     | Hatchback   |  2017  | 115500000 |

In [93]:
def product_generator(n_car, car_table, is_print):
    """
    Fungsi untuk membuat dummy dataset tabel cars
    header:
        - car_id
        - user_id
        - brand
        - model
        - colour
        - body_type
        - car_type 
        - year
        - price
        - photo

    args:
        - n_car (int) : jumlah mobil yang ingin dibuat
        - user_table (list) : list dictionary data users
        - car_csv (list) : list dictionary data mobil yang sudah disediakan 
        - is_print (bool) : jika True akan menampilkan hasil data

    return:
        - table  (list) 
    """
    
     # List of car transmision
    car_type = ['Automatic', 'Manual']
    
    # Create table
    table = {}
    table["car_id"] = [i for i in car_table["car_id"]]
    table["user_id"] = [random.randint(1, 20) for i in range(n_car)]
    table["brand"] = [i for i in car_table["brand"]]
    table["model"] = [i for i in car_table["model"]]
    table["body_type"] = [i for i in car_table["body_type"]]
    table["car_type"] = [random.choice(car_type) for i in range(n_car)]
    table["year"] = [i for i in car_table["year"]]
    table["price"] = [i for i in car_table["price"]]
    table["photo"] = [FAKER.image_url() for i in range(n_car)]


    # Print table
    if is_print:
        show_data(table)

    return table

In [94]:
car_table = product_generator(50, car_csv, True)

+----------+-----------+----------+-----------------+-------------+------------+--------+-----------+----------------------------------+
|  car_id  |  user_id  | brand    | model           | body_type   | car_type   |  year  |   price   | photo                            |
|----------+-----------+----------+-----------------+-------------+------------+--------+-----------+----------------------------------|
|    1     |     9     | Toyota   | Toyota Yaris    | Hatchback   | Manual     |  2016  | 175000000 | https://placekitten.com/953/320  |
|    2     |    13     | Toyota   | Toyota Yaris    | Hatchback   | Manual     |  2018  | 215000000 | https://picsum.photos/214/946    |
|    3     |     3     | Toyota   | Toyota Yaris    | Hatchback   | Automatic  |  2014  | 162000000 | https://dummyimage.com/239x218   |
|    4     |    20     | Toyota   | Toyota Yaris    | Hatchback   | Automatic  |  2020  | 220000000 | https://placekitten.com/510/668  |
|    5     |     8     | Toyota   | Toyot

### 4. Dummy data tabel advertisements

In [54]:
def advertisement_generator(n_advertisement, car_table, is_print):
    """
    Fungsi untuk membuat dummy dataset tabel advertisements
    header:
        - advertisement_id
        - car_id
        - title
        - description
        - bid_feature

    args:
        - n_ads (int) : Jumlah iklan yang ingin Anda buat
        - car_table (list) : daftar data mobil dictionary
        - is_print (bool) : Jika True akan menampilkan hasil datanya

    return:
        - table  (list) :
    """


    # Create table
    table = {}
    table["advertisement_id"] = [i+1 for i in range(n_advertisement)]
    table["car_id"] = [i for i in car_table_fix["car_id"]]
    table["title"] = [FAKER.text(max_nb_chars=20) for i in range(n_advertisement)]
    table["description"] = [FAKER.sentence(nb_words=10, variable_nb_words=True) for i in range(n_advertisement)]
    table["bid_feature"] = random.choices([True, False], weights=(0.9, 0.1), k=n_advertisement)


    # Print table
    if is_print:
        show_data(table)

    return table 

In [55]:
advertisement_table = advertisement_generator(50, car_table_fix, True)

+--------------------+----------+----------------------+-----------------------------------------------------------------------------------------------------------------+---------------+
|  advertisement_id  |  car_id  | title                | description                                                                                                     | bid_feature   |
|--------------------+----------+----------------------+-----------------------------------------------------------------------------------------------------------------+---------------|
|         1          |    1     | Et placeat vel.      | Incidunt quas beatae reiciendis voluptas assumenda harum sunt corporis.                                         | True          |
|         2          |    2     | Iure id odio totam.  | Fugiat ullam ut aspernatur deserunt quia ab nisi nostrum atque consectetur itaque.                              | True          |
|         3          |    3     | Aut tenetur vero.    | At fuga 

### 5. Dummy data tabel bids

In [62]:
def bid_generator(n_bid, user_table, advertisement_table, is_print):
    """
   Fungsi untuk membuat dummy data tabel bids
    header:
        - bid_id
        - advertisement_id
        - user_id
        - bid_price

    args:
        - n_bid (int) : Jumlah penawaran yang ingin Anda buat
        - user_table (list) : list dictionary data users
        - advertisement_table (list) : list dictionary data advertisement 
        - is_print (bool) : Jika True akan menampilkan hasil datanya
        
    return:
        - table  (list) 
    """


    # Create table
    table = {}
    table["bid_id"] = [i+1 for i in range(n_bid)]
    table["advertisement_id"] = [random.choice(advertisement_table['advertisement_id']) for i in range(n_bid)]
    table["user_id"] = [random.choice(user_table['user_id']) for i in range(n_bid)]
    table['bid_price'] = [random.randint(80_000_000,120_000_000) for i in range(n_bid)]
    table['bid_price'] = [price - (price % 1_000_000) for price in table['bid_price']]


    # Print table
    if is_print:
        show_data(table)
    return table

In [64]:
bid_table = bid_generator(100, users_table, advertisement_table, True)

+----------+--------------------+-----------+-------------+
|  bid_id  |  advertisement_id  |  user_id  |  bid_price  |
|----------+--------------------+-----------+-------------|
|    1     |         18         |    26     |  90000000   |
|    2     |         46         |    30     |  94000000   |
|    3     |         17         |    24     |  85000000   |
|    4     |         10         |    22     |  80000000   |
|    5     |         1          |     5     |  87000000   |
|    6     |         27         |     1     |  99000000   |
|    7     |         20         |    19     |  82000000   |
|    8     |         48         |    20     |  89000000   |
|    9     |         40         |    39     |  86000000   |
|    10    |         6          |    25     |  110000000  |
|    11    |         25         |    31     |  103000000  |
|    12    |         15         |     3     |  94000000   |
|    13    |         30         |    19     |  82000000   |
|    14    |         27         |    22 

### 6. Dummy data tabel bid_details

In [46]:
def generate_status(bid_table):
    """
   Fungsi untuk membuat status bid
    Contoh:
    bid_id modulus 9 status bid 'Sent' & 'Rejected'
    bid_id modulus 5 status bid 'Sent' 'Accepted'
    the rest is 'Sent'

    args:
    - bid_table (list): list dari dictionary data bid 
    returns:
    list (list) : kombinasi bid_id dan status
    """

    list_status = list()

    # Define start date
    start_date = datetime(2023, 6, 1)

    # Define end date
    end_date = datetime(2024, 4, 30, 23, 59, 59)

    for i in bid_table['bid_id']:
        if (i % 9 == 0):
            for j in ['Sent', 'Rejected']:
                bid_id = i
                status = j
                created_at = FAKER.date_time_between(start_date, end_date)
                data = f'{bid_id} {status} {created_at}'
                list_status.append(data)
        elif (i % 5 == 0):
            for j in ['Sent', 'Accepted']:
                bid_id = i
                status = j
                created_at = FAKER.date_time_between(start_date, end_date)
                data = f'{bid_id} {status} {created_at}'
                list_status.append(data)
        else:
            bid_id = i
            status = 'Sent'
            created_at = FAKER.date_time_between(start_date, end_date)
            data = f'{bid_id} {status} {created_at}'
            list_status.append(data)

    return list_status


In [85]:
def generate_bid_details(bid_table, is_print):
    """
    Fungsi untuk membuat dummy data tabel bid_details
    header:
        - bid_detail_id
        - bid_id
        - label
        - created_at

    args:
        - bid_table (list) : list of dictionary bid data
        - is_print (bool) : If True will display the data results

    return:
        - table  (list) :
    """

    # Create table
    table = {}
    list_status = generate_status(bid_table)
    table["bid_detail_id"] = [i+1 for i in range(110, 110+len(list_status))]
    table["bid_id"] = [i.split(' ')[0] for i in list_status]
    table["label"] = [i.split(' ')[1] for i in list_status]
    table['created_at'] = [f"{i.split(' ')[2]} {i.split(' ')[3]}" for i in list_status]

    # Print table
    if is_print:
        show_data(table)
    return table


In [84]:
bid_details_table = generate_bid_details(bid_table, True)

+-----------------+----------+----------+---------------------+
|  bid_detail_id  |  bid_id  | label    | created_at          |
|-----------------+----------+----------+---------------------|
|       111       |    1     | Sent     | 2023-09-17 09:28:38 |
|       112       |    2     | Sent     | 2023-10-24 17:40:57 |
|       113       |    3     | Sent     | 2023-08-09 01:38:24 |
|       114       |    4     | Sent     | 2023-06-12 22:24:16 |
|       115       |    5     | Sent     | 2023-07-24 09:55:15 |
|       116       |    5     | Accepted | 2023-11-08 13:44:56 |
|       117       |    6     | Sent     | 2024-01-06 09:18:23 |
|       118       |    7     | Sent     | 2023-07-02 18:43:43 |
|       119       |    8     | Sent     | 2023-12-17 03:00:36 |
|       120       |    9     | Sent     | 2023-07-21 16:14:06 |
|       121       |    9     | Rejected | 2024-01-14 04:41:50 |
|       122       |    10    | Sent     | 2023-11-28 18:12:01 |
|       123       |    10    | Accepted 

## Menyimpan file dalam bentuk csv

In [86]:
def save_to_csv(data, nama_file):
    '''
    Fungsi untuk menyimpan data dummy ke csv

    args:
      - data (list): list of dictionary data yang akan dijadikan csv
      - file_name (str) : nama untuk file csv

    returns:
      - None
    '''

    # Membuat file csv
    with open(file = f"{nama_file}.csv", mode = 'w', newline = '') as csv_file:
        # Membuat writer csv
        writer = csv.writer(csv_file)

        # Membuat header csv
        writer.writerow(list(data.keys()))

        # Mengetahui panjang data
        len_data = len(list(data.items())[0][1])

        # write data ke file csv
        for i in range(len_data):
            row = []
            for key in data.keys():
                row.append(data[key][i])
            writer.writerow(row)

In [87]:
save_to_csv(data = users_table,
            nama_file='users')

In [95]:
save_to_csv(data = car_table,
            nama_file='cars_table')

In [90]:
save_to_csv(data = advertisement_table,
            nama_file='advertisements')

In [91]:
save_to_csv(data = bid_table,
            nama_file='bids')

In [92]:
save_to_csv(data = bid_details_table,
            nama_file='bid_details')