In [2]:
#!/usr/bin/env python
# coding: utf-8

# Parsing web crawler json output into a pandas table
# By: Tanek17 (Eric Meissner)
# 
from datetime import datetime
import csv 
import numpy as np
import pandas as pd
import json
from collections import ChainMap


In [3]:
class Recipe:
    BaseGearScore = -1
    category = ""
    cooldown = -1
    description = ""
    event = ""
    icon = ""
    ID = ""
    ingredients = [] # can have Items or ItemCategories in it
    itemType = ""
    name = ""
    output = "" # Item type
    perkBuckets = []
    rarity = -1
    recipeLevel = -1
    salvageRecipe = ""
    stations = [] # list of strings
    station = "" # the primary one (non-camp)
    tradeskill = ""
    typ = "recipe"
    
    def __init__(self, row):
        
        self.BaseGearScore = row['BaseGearScore']
        self.category = row['category']
        self.cooldown = row['cooldown']
        self.description = row['description']
        self.event = row['event']
        self.icon = row['icon']
        self.itemType = row['itemType']
        self.name = row['name']
        self.perkBuckets = row['perkBuckets']
        self.rarity = row['rarity']
        self.recipeLevel = row['recipeLevel']
        self.salvageRecipe = row['salvageRecipe']
        self.stations = row['stations'] # list of strings
        self.station = row['stations'][0] if len(row['stations']) > 0 else "None" # the primary one (non-camp)
        self.tradeskill = row['tradeskill']
        
        self.ingredients = process_raw_items(row['ingredients']) # can have Items or ItemCategories in it
        self.output = process_item_or_itemCat(row['output']) # Item type
        self.ID = self.output.ID
    
    
class Item:
    typ = "item"
    itemType = ""
    ID = ""
    name = ""
    rarity = -1
    icon = ""
    quantity = -1
    
    def __init__(self, catDict):
        try:
            self.typ = catDict['type']
            self.itemType = catDict['itemType'] if 'itemType' in catDict else catDict['type']
            self.ID = catDict['id']
            self.name = catDict['name'] if 'name' in catDict else catDict['id']
            self.rarity = catDict['rarity'] if 'rarity' in catDict else -1
            self.icon = catDict['icon'] if 'icon' in catDict else 'None'
            self.quantity = catDict['quantity'] if 'quantity' in catDict else 1
        except:
            print("Failed to process item :( {})".format(catDict))
            raise
    

class ItemCategory:
    typ = ""
    name = ""
    ID = "" # will be the same as name
    icon = ""
    quantity = 1
    rarity = 0
    subIngredients = [] # Can have Items in it
    
    def __init__(self, catDict, subIngredients):
        try:
            self.typ = catDict['type']
            self.name = catDict['name']
            self.ID = catDict['name']
            self.icon = catDict['icon']
            self.rarity = compute_itemCat_rarity(subIngredients)
            self.quantity = catDict['quantity']
            self.subIngredients = subIngredients
        except:
            print("Failed to process ItemCategory :( {})".format(catDict))
            raise

def compute_itemCat_rarity(subIngredients):
    rarity = 999
    for subIng in subIngredients:
        if subIng.rarity < rarity:
            rarity = subIng.rarity
    return rarity
    
        
        
def process_item_or_itemCat(item):
    """
    rtype: Returns a single Item or ItemCategory
    """
    if hasattr(item, 'subIngredients'):
        subItems = []
        for subItem in item.subIngredients:
            try:
                subItems.append(process_item_or_itemCat(subItem))
            except:
                print("*************** Failed to add a subItem :O")
                subItems.append(None)
        return ItemCategory(item, subIngredients)
    elif 'subIngredients' in item:
        subItems = []
        for subItem in item['subIngredients']:
            try:
                subItems.append(process_item_or_itemCat(subItem))
            except:
                print("*************** Failed to add a subItem :O")
                subItems.append(None)
        return ItemCategory(item, subItems)
         
    else:
        return Item(item)
        
def process_raw_items(itemsList):
    """
    param itemsList: List of dictionaries that represent Items or ItemCategories
    rtype: Returns a list of Item and ItemCategory objects.
    """
    il = []
    for item in itemsList:
        il.append(process_item_or_itemCat(item))
    return il


def write_edgelist(fileString, edges):

    with open(fileString, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Source", "Target",
                         "BaseGearScore", "Cooldown",
                         "RecipeLevel", "SalvageRecipe",
                         "Quantity", "Tradeskill",
                         "Rarity", "Station",
                         "Icon", "Type", 
                         "Category"]) #These column headers are used in Gephi automatically
        for (source, target), (quantity, recipeOrItemCat) in edges.items():
            if isinstance(recipeOrItemCat, Recipe):
                writer.writerow([source, target,
                                 recipeOrItemCat.recipeLevel, recipeOrItemCat.salvageRecipe,
                                 recipeOrItemCat.BaseGearScore, recipeOrItemCat.cooldown,
                                 quantity, recipeOrItemCat.tradeskill,
                                 recipeOrItemCat.rarity, recipeOrItemCat.station,
                                 recipeOrItemCat.icon, recipeOrItemCat.typ,
                                 recipeOrItemCat.category])
            else:
                writer.writerow([source, target,
                                 0, "Item Category",
                                 0, "Item Category",
                                 quantity, "Item Category",
                                 recipeOrItemCat.rarity, "Item Category",
                                 recipeOrItemCat.icon, recipeOrItemCat.typ,
                                 "Item Category"])

def write_nodelist(fileString, nodes):

    with open(fileString, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["ID", "Label", "Rarity",
                         "ItemType", "Icon"]) #These column headers are used in Gephi automatically
        for node, item in nodes.items():
            if isinstance(item, Item):
                writer.writerow([item.ID, item.name, item.rarity, item.itemType, item.icon])
            else:
                writer.writerow([item.ID, item.name, item.rarity, "Item Category", item.icon])

    

In [22]:
now = datetime.now()
DATA_FOLDER = "data/"
SUFFIX = "-8-28-21"
INPUT_JSONBLOB_CSV = DATA_FOLDER+"recipes.csv"
edgeString = "{}recipes_edgelist{}.csv".format(DATA_FOLDER, SUFFIX)
nodeString = '{}recipes_nodelist{}.csv'.format(DATA_FOLDER, SUFFIX)

df = pd.read_csv(INPUT_JSONBLOB_CSV)
df.sort_values('recipe', ascending=False, inplace=True)

In [23]:
df

Unnamed: 0,recipe
2719,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
2373,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
2078,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
1816,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
2625,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
640,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
837,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
487,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
643,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."
838,"{""success"":true,""data"":{""type"":""recipe"",""id"":""..."


In [24]:
def process_rowjson(row):
    try:
        return pd.read_json(row['recipe'])['data']
    except:
        print("errored row:", row)
        
raw_recipe_table = df.apply(process_rowjson, axis=1)
print(raw_recipe_table.shape)

errored row: recipe    {"success":true,"data":{"type":"recipe","id":"...
Name: 518, dtype: object
errored row: recipe    recipe
Name: 302, dtype: object
errored row: recipe    recipe
Name: 251, dtype: object
errored row: recipe    recipe
Name: 107, dtype: object
errored row: recipe    recipe
Name: 53, dtype: object
errored row: recipe    recipe
Name: 197, dtype: object
errored row: recipe    recipe
Name: 161, dtype: object
(2952, 22)


In [25]:
raw_recipe_table.set_index('id', inplace=True)
print(raw_recipe_table.shape)
raw_recipe_table = raw_recipe_table[raw_recipe_table.index.notnull()]
print(raw_recipe_table.shape)

(2952, 21)
(2945, 21)


In [26]:
recipes = raw_recipe_table.apply(lambda row: Recipe(row), axis=1)

In [27]:
nodes = {} # ID : Item
for recipe in recipes:
    for item in recipe.ingredients:
        if isinstance(item, ItemCategory):
            for subIng in item.subIngredients:
                nodes[subIng.ID] = subIng
        nodes[item.ID] = item
    nodes[recipe.output.ID] = recipe.output
print(len(nodes.keys()))

3911


In [28]:
edges = {} # (SourceID, TargetID) : Recipe / ItemCategory
for recipe in recipes:
    for item in recipe.ingredients:
        if isinstance(item, ItemCategory):
            for subIng in item.subIngredients:
                edges[(subIng.ID, item.ID)] = (1, item)
        edges[(item.ID, recipe.output.ID)] = (item.quantity, recipe)
#     edges[recipe.output.ID] = recipe.output
print(edges.keys())

dict_keys([('workorder_workrationst5', 'workrationst5'), ('workorder_workrationst4', 'workrationst4'), ('workorder_workrationst3', 'workrationst3'), ('workorder_workrationst2', 'workrationst2'), ('reagentconvertert5', 'tannint5'), ('clothweavet5', 'Refining Materials Tier 5'), ('solventt5', 'Refining Materials Tier 5'), ('sandpapert5', 'Refining Materials Tier 5'), ('fluxt5', 'Refining Materials Tier 5'), ('Refining Materials Tier 5', 'tannint5'), ('reagentconvertert4', 'tannint4'), ('clothweavet4', 'Refining Materials Tier 4'), ('solventt4', 'Refining Materials Tier 4'), ('sandpapert4', 'Refining Materials Tier 4'), ('fluxt4', 'Refining Materials Tier 4'), ('Refining Materials Tier 4', 'tannint4'), ('reagentconvertert3', 'tannint3'), ('clothweavet3', 'Refining Materials Tier 3'), ('solventt3', 'Refining Materials Tier 3'), ('sandpapert3', 'Refining Materials Tier 3'), ('fluxt3', 'Refining Materials Tier 3'), ('Refining Materials Tier 3', 'tannint3'), ('reagentconvertert5', 'solventt5'

In [29]:
write_edgelist(edgeString, edges)
write_nodelist(nodeString, nodes)