## Every product/combination of nested dictionaries saved to DataFrame

* [stack overflow link](https://stackoverflow.com/questions/57663316/every-product-combination-of-nested-dictionaries-saved-to-dataframe/57668373#57668373)

In [None]:
import pandas as pd
from pprint import pprint as pp

In [None]:
d1 = {
    "chisel": [
        {"type": "chisel"},
        {"depth": [152, 178, 203]},
        {"residue incorporation": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]},
        {"timing": ["10-nov", "10-apr"]},
    ],
    "disc": [
        {"type": "disc"},
        {"depth": [127, 152, 178, 203]},
        {"residue incorporation": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]},
        {"timing": ["10-nov", "10-apr"]},
    ],
    "no_till": [
        {"type": "user_defined"},
        {"depth": [0]},
        {"residue incorporation": [0.0]},
        {"timing": ["10-apr"]},
    ],
}

In [None]:
def fix_list_dicts(data: dict) -> dict:
    """
    Given a dict where the values are a list of dicts:
    (1) convert the value to a dict of dicts
    (2) if any second level value is a str, convert it to a list
    """
    data_new = dict()
    for k, v in data.items():
        v_new = dict()
        for x in v:
            for k1, v1 in x.items():
                if type(v1) != list:
                    x[k1] = [v1]
            v_new.update(x)
        data_new[k] = v_new
    return data_new

In [None]:
d1 = fix_list_dicts(d1)

In [None]:
pp(d1)

In [None]:
d2 = {
    "nh4_n":
        {
            "kg/ha":[110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225],
            "fertilize_on":"10-apr"
        },
    "urea_n":
        {
            "kg/ha":[110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225],
            "fertilize_on":"10-apr"
        }
}

In [None]:
def add_top_key_as_value(data: dict, new_key: str) -> dict:
    """
    Given a dict of dicts, where top key is not a 2nd level value:
    (1) add new key: value pair to second level
    """
    for k, v in data.items():
        v.update({new_key: k})
        data[k] = v
    return data

In [None]:
d2 = add_top_key_as_value(d2, 'fertilizer')
d2 = str_value_to_list(d2)

In [None]:
pp(d2)

In [None]:
d3 = {
    "maize": {
        "sow_crop": 'maize',
        "cultivar": ['B_105', 'B_110'],
        "planting_dates": [
            '20-apr', '27-apr', '4-may', '11-may', '18-may', '25-may', '1-jun', '8-jun', '15-jun'],
        "sowing_density": [8],
        "sowing_depth": [51],
        "harvest": ['maize'],
    }
}

In [None]:
def str_value_to_list(data: dict) -> dict:
    """
    Given a dict of dicts:
    (1) Convert any second level value from str to list
    """    
    for k, v in data.items():
        for k2, v2 in v.items():
            if type(v2) != list:
                data[k][k2] = [v2]
    return data    

In [None]:
d3 = str_value_to_list(d3)

In [None]:
pp(d3)

In [None]:
def combine_the_data(data: list) -> dict:
    """
    Given a list of dicts:
    (1) convert each dict into DataFrame
    (2) set the indices to 0
    (3) add each DataFrame to df_dict
    """
    df_dict = dict()
    for i, d in enumerate(data):
        df = pd.DataFrame.from_dict(d, orient='index')
        df.index = [0 for _ in range(len(df))]
        df_dict[f'd_{i}'] = df
        
    return df_dict

In [None]:
data = [d1, d2, d3]
df_dict = combine_the_data(data)

In [None]:
df_dict['d_0']

In [None]:
df_dict['d_1']

In [None]:
df_dict['d_2']

In [None]:
def merge_df_dict(data: dict) -> pd.DataFrame:
    """
    Given a dict of DataFrames
    (1) merge them on the index
    """
    df = pd.DataFrame()
    for _, v in data.items():
        df = df.merge(v, how='outer', left_index=True, right_index=True)
    return df

In [None]:
df = merge_df_dict(df_dict)

In [None]:
df

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
for col in df.columns:
    df = df.explode(col).reset_index(drop=True)

In [None]:
df

In [None]:
df.type.value_counts()