### Objectives
This code file aims to generate ``Operation`` and ``Operation_type_with_chip_type``.

In [75]:
import pandas as pd
import numpy as np
import random
import datetime

### Attributes of ``Operation``
Foreign key: ``operation_name``

Primary key: ``id``

Attributes: ``start_time``, ``end_time``

### Attributes of ``Chip_type_with_operation_type`` (The relation between Operation_type and Chip_type)
Foreign key & Primary key: ``operation_name``, ``chip_name``, ``chip_version``
Attributes: ``order``

### Generate ``Chip_type_with_operation_type``
1. Find all chip types and operation types;

2. For each chip type, randomly select several operation types to perfrom with specific orders.

In [76]:
operation_type = pd.read_csv("dataset/operation_type.csv")
all_names = list(operation_type["operation_name"])

chip_type = pd.read_csv("dataset/chip_type.csv")

operation_name = []
chip_name = []
chip_version = []
order = []

for idx, row in chip_type.iterrows():
    num_of_operations = random.randint(2, 10)
    selected_names = np.random.choice(all_names, num_of_operations, replace=False)
    for i in range(num_of_operations):
        order.append(i)
        chip_name.append(row["chip_name"])
        chip_version.append(row["chip_version"])
        operation_name.append(selected_names[i])
    
chip_type_with_operation_type = pd.DataFrame(data={"operation_name":operation_name, "chip_name":chip_name, "chip_version":chip_version, "order":order})
chip_type_with_operation_type.to_csv("dataset/chip_type_with_operation_type.csv", index=False)

### Generate ``Operation``
1. Find all packages;

2. For each package, find its start time and end time by retrieving from state;

3. For each package, find all chips included in the package;

3. For each chip, find its chip type;

4. For each chip type, find all operation types it needs;

5. Generate random operations.

In [77]:
chip_type_operation_type_map = dict()
for i in range(len(chip_name)):
    key = chip_name[i] + str(chip_version[i])
    if key not in chip_type_operation_type_map:
        chip_type_operation_type_map[key] = list()
    chip_type_operation_type_map[key].append(operation_name[i])

package = pd.read_csv("dataset/state.csv")
period_map = dict()
for idx, row in package.iterrows():
    period_map[row["package_id"]] = dict()
    period_map[row["package_id"]]["start_time"] = datetime.datetime.strptime(row["start_time"], "%Y-%m-%d %H:%M:%S")
    period_map[row["package_id"]]["end_time"] = datetime.datetime.strptime(row["end_time"], "%Y-%m-%d %H:%M:%S")

id = []
operation_name2 = []
start_time = []
end_time = []
count = 0

chip = pd.read_csv("dataset/chip.csv")
for idx, row in chip.iterrows():
    package_start_time = period_map[row["package_id"]]["start_time"]
    package_end_time = period_map[row["package_id"]]["end_time"]
    current_time = package_start_time
    key = row["chip_name"] + str(row["chip_version"])
    for op_name in chip_type_operation_type_map[key]:
        lag = 2 * ((package_end_time - package_start_time).days * 86400 + (package_end_time - package_start_time).seconds) // (len(chip_type_operation_type_map[key]) - 1)
        lag = random.randint(1, lag)
        id.append(count)
        operation_name2.append(op_name)
        start_time.append(current_time)
        end_time.append(current_time + datetime.timedelta(seconds=lag))
        current_time += datetime.timedelta(seconds=lag)
        count += 1
    end_time[len(end_time)-1] = package_end_time

operation = pd.DataFrame(data={"operation_name":operation_name2, "id":id, "start_time":start_time, "end_time":end_time})
operation.to_csv("dataset/operation.csv", index=False)