# Data Cleaning

In [2]:
import json
from pathlib import Path

In [3]:
SECONDS = 60

DATA_PATH = Path("../data")
ORIGINAL_DATA = DATA_PATH.joinpath("original_data.json")

ITEMS_FILE = DATA_PATH.joinpath("items.json")
BUILDINGS_FILE = DATA_PATH.joinpath("building.json")
RECIPES_FILE = DATA_PATH.joinpath("recipes.json")
GENERATORS_FILE = DATA_PATH.joinpath("generators.json")
RESOURCES_FILE = DATA_PATH.joinpath("resources.json")

IGNORE_ITEMS = [
    "alien-dna-capsule",
    "alien-protein",
    "bacon-agaric",
    "beryl-nut",
    "biomass",
    "blade-runners",
    "blue-power-slug",
    "chainsaw",
    "factory-carttm",
    "ficsit-coupon",
    "gas-mask",
    "golden-factory-carttm",
    "hatcher-remains",
    "hazmat-suit",
    "hog-remains",
    "hoverpack",
    "jetpack",
    "leaves",
    "liquid-biofuel",
    "medicinal-inhaler",
    "mercer-sphere",
    "mycelia",
    "nobelisk-detonator",
    "object-scanner",
    "packaged-liquid-biofuel",
    "paleberry",
    "parachute",
    "purple-power-slug",
    "rebar-gun",
    "rifle",
    "solid-biofuel",
    "somersloop",
    "spitter-remains",
    "stinger-remains",
    "wood",
    "xeno-basher",
    "xeno-zapper",
    "yellow-power-slug",
    "zipline",
]

In [4]:
with open(ORIGINAL_DATA) as f:
    data = json.load(f)

In [5]:
data.keys()

dict_keys(['items', 'recipes', 'schematics', 'generators', 'resources', 'miners', 'buildings'])

## Items

In [6]:
data["items"]["Desc_NuclearWaste_C"].keys()

dict_keys(['slug', 'icon', 'name', 'description', 'sinkPoints', 'className', 'stackSize', 'energyValue', 'radioactiveDecay', 'liquid', 'fluidColor'])

In [7]:
all_items = [i for _, i in data["items"].items()]
items = [i for i in all_items if i["slug"] not in IGNORE_ITEMS]
items.sort(key=lambda item: item["slug"])

In [8]:
for item in items:
    del item["icon"]
    del item["description"]
    del item["stackSize"]
    del item["radioactiveDecay"]
    del item["fluidColor"]


In [9]:
item_slug_by_className = {i["className"]: i["slug"] for i in items}

## Buildings

In [10]:
machines = [
    "Desc_AssemblerMk1_C",
    "Desc_Blender_C",
    "Desc_ConstructorMk1_C",
    "Desc_Converter_C",
    "Desc_FoundryMk1_C",
    "Desc_HadronCollider_C",
    "Desc_ManufacturerMk1_C",
    "Desc_OilRefinery_C",
    "Desc_Packager_C",
    "Desc_QuantumEncoder_C",
    "Desc_SmelterMk1_C",
]
buildings = [b for _, b in data["buildings"].items() if b["className"] in machines]

for building in buildings:
    building["powerConsumption"] = building["metadata"]["powerConsumption"]
    del building["icon"]
    del building["description"]
    del building["categories"]
    del building["buildMenuPriority"]
    del building["metadata"]
    del building["size"]

In [11]:
building_slug_by_className = {b["className"]: b["slug"] for b in buildings}

## Recipes

In [12]:
recipes = [r for _, r in data["recipes"].items()]
recipes = [r for r in recipes if r["inMachine"]]

excluded_slugs = [
    "biofuel",
    "alternate-coal",
    "protein",
    "biomass",
    "powercrystalshard",
    "aliendnacapsule",
    "nobeliskgas",
    "recipe-fabric-c",
]
recipes = [
    r for r in recipes if not any(excluded in r["slug"] for excluded in excluded_slugs)
]


for recipe in recipes:
    recipe["slug"] = recipe["slug"].rstrip("-c")

    del recipe["inHand"]
    del recipe["forBuilding"]
    del recipe["inWorkshop"]
    del recipe["inMachine"]
    del recipe["manualTimeMultiplier"]

    for ingredient in recipe["ingredients"]:
        ingredient_className = ingredient["item"]
        ingredient["item"] = item_slug_by_className.get(ingredient_className)
        ingredient["amount_per_minute"] = (
            ingredient["amount"] * SECONDS / recipe["time"]
        )

    for product in recipe["products"]:
        product_className = product["item"]
        product["item"] = item_slug_by_className.get(product_className)
        product["amount_per_minute"] = product["amount"] * SECONDS / recipe["time"]

    recipe["producedIn"] = building_slug_by_className.get(recipe["producedIn"][0])

    del recipe["className"]


In [13]:
recipes[0]

{'slug': 'recipe-ironplate',
 'name': 'Iron Plate',
 'alternate': False,
 'time': 6.0,
 'ingredients': [{'item': 'iron-ingot',
   'amount': 3.0,
   'amount_per_minute': 30.0}],
 'products': [{'item': 'iron-plate',
   'amount': 2.0,
   'amount_per_minute': 20.0}],
 'producedIn': 'constructor',
 'isVariablePower': False,
 'minPower': 0,
 'maxPower': 1}

## Generators

In [353]:
generators = [g for _, g in data["generators"].items()][:-1]

In [354]:
def slugify_generator_name(classname: str) -> str:
    classname = classname.lstrip("Desc_")
    classname = classname.rstrip("_C")
    new_classname = ""
    for c in classname:
        if c.isupper():
            new_classname += "-" + c.lower()
        else:
            new_classname += c

    return new_classname[1:]

In [355]:
for generator in generators:
    generator["slug"] = slugify_generator_name(generator["className"])
    del generator["className"]
    generator["fuel"] = [
        item_slug_by_className.get(fuel)
        for fuel in generator["fuel"]
        if item_slug_by_className.get(fuel) is not None
    ]

generators

[{'fuel': ['coal', 'compacted-coal', 'petroleum-coke'],
  'powerProduction': 75,
  'powerProductionExponent': 1.6,
  'waterToPowerRatio': 10.0,
  'slug': 'generator-coal'},
 {'fuel': ['uranium-fuel-rod', 'plutonium-fuel-rod', 'ficsonium-fuel-rod'],
  'powerProduction': 2500,
  'powerProductionExponent': 1.6,
  'waterToPowerRatio': 1.6,
  'slug': 'generator-nuclear'},
 {'fuel': ['fuel', 'turbofuel', 'rocket-fuel', 'ionized-fuel'],
  'powerProduction': 250,
  'powerProductionExponent': 1.6,
  'waterToPowerRatio': 0.0,
  'slug': 'generator-fuel'}]

## Resources

In [366]:
resources = [
    {"slug": "bauxite", "name": "Bauxite", "amount": 12300},
    {"slug": "caterium-ore", "name": "Caterium Ore", "amount": 15000},
    {"slug": "coal", "name": "Coal", "amount": 42300},
    {"slug": "copper", "name": "Copper", "amount": 36900},
    {"slug": "crude-oil", "name": "Crude Oil", "amount": 12600},
    {"slug": "iron-ore", "name": "Iron Ore", "amount": 92100},
    {"slug": "limestone", "name": "Limestone", "amount": 69900},
    {"slug": "nitrogen-gas", "name": "Nitrogen Gas", "amount": 12000},
    {"slug": "raw-quartz", "name": "Raw Quartz", "amount": 13500},
    {"slug": "sulfur", "name": "Sulfur", "amount": 10800},
    {"slug": "uranium", "name": "Uranium", "amount": 2100},
    {"slug": "sam", "name": "SAM", "amount": 10200},
]

total = sum([r["amount"] for r in resources])


for resource in resources:
    resource["percentage_of_total"] = resource["amount"] / total
    resource["rarity"] = total / resource["amount"]

## Final Cleanup

In [358]:
for item in items:
    del item["className"]

## Save to file

In [359]:
with open(ITEMS_FILE, "w") as f:
    json.dump(items, f, indent=4)

In [360]:
with open(BUILDINGS_FILE, "w") as f:
    json.dump(buildings, f, indent=4)

In [14]:
with open(RECIPES_FILE, "w") as f:
    json.dump(recipes, f, indent=4)

In [362]:
with open(GENERATORS_FILE, "w") as f:
    json.dump(generators, f, indent=4)

In [367]:
with open(RESOURCES_FILE, "w") as f:
    json.dump(resources, f, indent=4)