This notebook can be used to sample the json files that serve as base for the hackaton

In [None]:
import pandas as pd
import json
import numpy as np
from random import choice
from datetime import timedelta, date
import os

In [None]:
def generate_subsequences_of_random_length(l, n, m, p):
    """ Takes the list l and generates subsequences of random length 
    
    Each subsequence containd an element from l. 
    
    The length of the subsequence is Binomial(m, p) distributed.
    
    Args:
        l (list): the list with the elements of the subsequence
        n (int): the size of the subsequence
        m (int): the maximal size of subsequence (Binomial(m, p) distributed)
        p (float): probability of the length of subsequence (Binomial(m, p) distributed)
    
    Will return the concatenated subsequences as list of size l.
    """
    result = []
    while len(result) < n:
        result += [choice(l)] * int(np.random.binomial(m, p, 1))
                   
    return result[:n]


# exemplary usage 
r = generate_subsequences_of_random_length("abcde", 30, 5, 0.5)
np.array(r)

In [None]:
def dump_data_frame_as_json(df, name):
    """Dumps a dataframe as json under name n,

    make sure every row has its own line in the result file
    """
    with open(name + "_ temp", 'w') as outfile:
        json.dump(df.to_dict(orient='record'), outfile)
    
    with open(name + "_ temp", 'r') as outfile: 
        f = outfile.read().replace("},", "},\n")

    with open(name, 'w') as outfile: 
        outfile.write(f)

    os.remove(name + "_ temp")
    

# Prod schedule

In [None]:
n_total = 100
n_seq = 8

grade = generate_subsequences_of_random_length(["STI1", "ST2", "PPQ"], n_total, 10, 0.6)
cu_range = generate_subsequences_of_random_length([0.08, 0.10, 0.12], n_total, 5, 0.3)
al_range = generate_subsequences_of_random_length([0.001, 0.005, 0.010, 0.015], n_total, 10, 0.3)
ox_range = generate_subsequences_of_random_length([0.001, 0.005, 0.010, 0.015], n_total, 15, 0.2)

df_prod_sced = pd.DataFrame({"heat_sq": np.repeat(range(1, n_total//n_seq + 2), n_seq)[:n_total], 
                             "head_id": [f"heat-{i}" for i in range(n_total)],
                             "steel_grade": grade,
                             "required_weight": np.random.binomial(160, 0.9, size=n_total),
                             "chemistry": [{"cu_pct": cu_range[i], "al_pct": al_range[i], "ox_pct": ox_range[i],} for i in range(n_total)]
                            })
df_prod_sced.head()

In [None]:
dump_data_frame_as_json(df_prod_sced, 'production_schedule_nb.json')

# Inventory

In [None]:
n_total = 100
scrap_types = ["bushling", "pig_iron", "hbi"]
date_range = pd.date_range(start='1/1/2019', end='03/01/2019').strftime("%Y%m%d")
delivery_type = ["cart"] * 20 + ["truck"] * 3 + ["ship"]


df_inventory = pd.DataFrame({"scrap_type": [choice(scrap_types) for _ in range(n_total)], 
                             "delivery_date": [choice(date_range) for _ in range(n_total)],
                             "delivery_type": [choice(delivery_type) for _ in range(n_total)],
                            }).sort_values(by="delivery_date")

def sample_weight(dt):  
    if dt == "cart":
        return np.random.binomial(20, 0.7, size=1)[0]
    if dt == "truck":
        return np.random.binomial(40, 0.9, size=1)[0]
    if dt == "ship":
        return np.random.binomial(3000, 0.5, size=1)[0]
    
def sample_provider(dt):  
    if dt == "cart":
        return choice("aabcdee")
    if dt == "truck":
        return choice("aaaaaccceeee")
    if dt == "ship":
        return choice("bbbbbddd")
    
df_inventory["sample_weight"] = df_inventory["delivery_type"].apply(sample_weight)
df_inventory["provider"] = df_inventory["delivery_type"].apply(sample_provider)

df_inventory.head()

In [None]:
dump_data_frame_as_json(df_inventory, 'srap_inventory_nb.json')

# Orders

In [None]:
n_total = 30

df_order = pd.DataFrame({"scrap_type": [choice(scrap_types) for _ in range(n_total)],
                         "order_date": [choice(date_range) for _ in range(n_total)], 
                         "status": "delivered", 
                         "weight": np.random.binomial(10000, 0.7, size=n_total), 
                        }).sort_values(by="order_date")

def sample_price(st):   
    if st == "bushling":
        return np.random.binomial(250, 0.7, size=1)[0]
    if st == "pig_iron":
        return np.random.binomial(300, 0.9, size=1)[0]
    if st == "hbi":
        return np.random.binomial(600, 0.5, size=1)[0]
    
df_order["price_per_ton"] = df_order["scrap_type"].apply(sample_price)
df_order[-10:]["status"] = "outstanding"
df_order.head()

In [None]:
dump_data_frame_as_json(df_order, 'srap_orders_nb.json')