In [2]:
import pandas as pd
import numpy as np
import ast

In [3]:
BASE_PICK = "Mammalia-15040"
#BASE_PICK = "Chordata-2499"

DATA_IN = 'data/{}-df-images-base64.csv'.format(BASE_PICK)
NODE_OUT= 'data/{}-df-nodes.csv'.format(BASE_PICK)
LINE_OUT= 'data/{}-df-lines.csv'.format(BASE_PICK)
print("loading "+DATA_IN)

loading data/Mammalia-15040-df-images-base64.csv


In [4]:
df = pd.read_csv(DATA_IN)
orig_cols = list(df.columns)

In [5]:
#orig_cols

In [6]:
TOP = 'Begin'
BTM = 'End'

In [12]:
def time_log(num):
    t = np.log(num)
    if np.isnan(t):
        return np.nan
    elif t < 0:
        return 0
    else:
        return t

def bytes_to_str(x):
    try:
        return 'data:image/jpeg;base64,'+ast.literal_eval(x).decode('utf-8')
    except SyntaxError:
        return x

def add_blank_row(row, cols):
    new_blank_row = row[cols]
    for c in cols:
        new_blank_row[c] = None
    return new_blank_row

In [13]:
# get nodes
node_cols = [c for c in orig_cols if (c != TOP) and (c != BTM)]+['z']
node_list = []

for i, row in df.iterrows():

    new_row_1 = row.rename({TOP:'z'})[node_cols].to_dict()
    node_list.append(new_row_1)
    new_row_2 = row.rename({BTM:'z'})[node_cols].to_dict()
    node_list.append(new_row_2)


In [15]:
node_df = pd.DataFrame(node_list)
node_df['z'] = node_df['z'].apply(time_log)
node_df['img_base64'] = node_df['img_base64'].apply(bytes_to_str)
node_df.to_csv(NODE_OUT, index=False)

In [10]:
# get lines
line_cols = [c for c in orig_cols if (c != TOP) and (c != BTM) and ('img' not in c)]+['z']
line_list = []

for i, row in df.iterrows():
    # vertical line
    new_row_1 = row.rename({TOP:'z'})[line_cols].to_dict()
    line_list.append(new_row_1)
    new_row_2 = row.rename({BTM:'z'})[line_cols].to_dict()
    line_list.append(new_row_2)
    line_list.append(add_blank_row(row, line_cols))
    # horizontal line
    parent_row = df.loc[df.id == new_row_1['ancestor']].squeeze()
    if len(parent_row) == 0:
        continue
    else:
        line_list.append(new_row_1)
        new_row_3 = new_row_1.copy()
        new_row_3['x'] = parent_row['x']
        new_row_3['y'] = parent_row['y']
        line_list.append(new_row_3)
        line_list.append(add_blank_row(row, line_cols))

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


In [11]:
line_df = pd.DataFrame(line_list)
line_df['z'] = line_df['z'].apply(time_log)
line_df.to_csv(LINE_OUT, index=False)