In [182]:
import pandas as pd
import altair as alt
import numpy as np
from sklearn.decomposition import PCA
import networkx as nx
import umap
import random

# Family Portrait

In [4]:
data=pd.read_csv('../public/lang-meta.tsv',sep='\t')

In [5]:
keys = [
    'System', 
    'Carrier', 
    'Output Type',
    'Conceptual Model', 
    'Abstraction Mechanism', 
    'Source', 
    'Language Form',
    'Coded Domain', 
    'Execution Model', 
    'Alt API Available', 
    'Extensible',
    'Formal Definition Available', 
    'Language', 
    'Data manipulation',
    'Provides Accessibility', 
    'Juxtaposition strategy', 
    'Allowed Data Type',
    'Data model', 
    'Interaction source', 
    'Open Source', 
    'Dependent',
    'Mark Types', 
    'Series Types', 
    # 'Output Type Coded',
    'Embedded language', 
    'Coordinate Systems'
    ]


binary_columns = [
    'Abstraction Mechanism',  
    'Alt API Available', 
    'Extensible',
    'Formal Definition Available', 
    'Provides Accessibility', 
    'Open Source', 
    'Dependent',
]

dumb_cols = [
    'Carrier', 
    'Output Type',
    'Conceptual Model',
    'Source', 
    'Language Form',
    'Coded Domain', 
    'Execution Model',
    'Language', 
    'Data manipulation',
    'Juxtaposition strategy', 
    'Allowed Data Type',
    'Data model', 
    'Interaction source', 
    # 'Output Type Coded',
    'Embedded language', 
]

split_cols = [
    'Mark Types', 
    # 'Series Types', 
    'Coordinate Systems'
]

df = data[keys].copy()

gen_split_cols = []
for col in split_cols:
    col_types = set(",".join(df[col].dropna().tolist()).split(','))
    for col_type in col_types:
        new_col = str(col + col_type + 'new')
        gen_split_cols.append(new_col)
        df[new_col] = df[col].apply(lambda x : 1 if col_type in str(x) and str(x) != 'nan' else 0)


for bin_col in binary_columns: 
    df[bin_col] = df[bin_col].apply(lambda x : 0 if "no" in x.lower() else 0)
    

keep_cols = ['System'] + dumb_cols + binary_columns + gen_split_cols
out_df = pd.get_dummies(df[keep_cols], columns=dumb_cols)
ana_df = out_df.copy().drop(['System'], axis=1)

In [6]:
def simple_plot(inp, color):
    local_df = pd.DataFrame(inp, columns=df['System']).T.reset_index().rename(columns={0: "x", 1: 'y'}).merge(df, on=['System'])
    base = alt.Chart(local_df).encode(
        x=alt.X("x", scale=alt.Scale(zero=False)), 
        y=alt.Y("y", scale=alt.Scale(zero=False)),
        color=alt.Color(color, scale=alt.Scale(range=["#EE0000", "#AF00DB", "#267f99"])),
        tooltip=["System"])
        # .properties(width=900)
    circles = base.mark_point().encode(shape=color)
    txts = base.mark_text(dy=-10).encode(text="System")
    return circles + txts

In [7]:
pca = PCA(n_components=2)
pca.fit(ana_df.T)

PCA(n_components=2)

In [8]:
simple_plot(pca.components_, "Carrier");

In [9]:
reducer = umap.UMAP()
embedding = reducer.fit_transform(ana_df)

In [10]:
simple_plot(embedding.T, "Conceptual Model")

In [252]:
rel_data=pd.read_csv('./relationships.tsv',sep='\t')#.melt(id_vars=["System"])
subs = []
for idx in range(3):
    entity = "Rel " + str(idx + 1) + ": Entity"
    verb = "Rel " + str(idx + 1) + ": Verb"
    subs.append(rel_data.copy().rename(columns={entity: "Entity", verb: "Verb"})[['System', 'Entity', 'Verb']])
rel_data = pd.concat(subs).dropna()
local_df = pd.DataFrame(embedding.T, columns=df['System']).T.reset_index().rename(columns={0: "x", 1: 'y'}).merge(df, on=['System'])

coords = rel_data.merge(local_df[['x', 'y', 'System']], left_on="Entity", right_on="System").merge(local_df[['x', 'y', 'System']], left_on="System_x", right_on="System")
coord_df = coords[['Verb', 'x_x', 'y_x', 'x_y', 'y_y']].rename(columns={"x_x": "x1", "y_x": "y1", "x_y": "x2", "y_y": "y2"})
coord_df.reset_index(inplace=True)
coord_list = coord_df.rename(columns = {'index':'rel'}).to_dict('records') 
coord_return = []
for row in coord_list:
    coord_return.append({"verb": row['Verb'], "rel": row['rel'], "x": row['x1'], "y": row['y1']})
    coord_return.append({"verb": row['Verb'], "rel": row['rel'], "x": row['x2'], "y": row['y2']})
coord_df = pd.DataFrame(coord_return)

In [16]:


local_df = pd.DataFrame(embedding.T, columns=df['System']).T.reset_index().rename(columns={0: "x", 1: 'y'}).merge(df, on=['System'])
base = alt.Chart(local_df).encode(
    x=alt.X("x", scale=alt.Scale(zero=False)), 
    y=alt.Y("y", scale=alt.Scale(zero=False)),
    color=alt.Color("Conceptual Model", scale=alt.Scale(range=["#EE0000", "#AF00DB", "#267f99", "#0000ff", "#d16969", "#795E26"])),
    shape="Conceptual Model",
    tooltip=["System"])\
    .properties(height=600)
circles = base.mark_point()
txts = base.mark_text(dy=-10).encode(text="System")

lines = alt.Chart(coord_df).mark_line(opacity=0.2).encode(x="x", y="y", detail="rel", color="verb", strokeDash="verb")
# lines = alt.Chart(coord_df).mark_line().encode(x="x", y="y", detail="rel", strokeDash="verb", color="verb")

lines + circles + txts

In [253]:
G = nx.Graph()
type_map = {}
for x in rel_data.to_dict('records'):
    # if not x['Verb'] == 'Inspired':
    G.add_edge(x["Entity"], x["System"])
    type_map[(x["Entity"], x["System"])] = x['Verb']
    type_map[(x["System"], x["Entity"])] = x['Verb']
pos = nx.planar_layout(G)
# pos = graphviz_layout(G, prog="twopi")
# pos = nx.spring_layout(G, seed=3068)  # Seed layout for reproducibility
# nx.draw(G, pos=pos, with_labels=True)
rows = []
for (src, targ) in G.edges:
    src_pos = pos[src]
    targ_pos = pos[targ]
    row = {"source": src, "target": targ, "source_x": src_pos[0], "source_y": src_pos[1], "target_x": targ_pos[0], "target_y": targ_pos[1]}
    rows.append(row)
net_df = pd.DataFrame(rows)
net_df

source_points = alt.Chart(net_df).mark_circle().encode(x="source_x", y="source_y")
target_points = alt.Chart(net_df).mark_circle().encode(x="target_x", y="target_y")
labels = source_points.mark_text().encode(text="source") + target_points.mark_text().encode(text="target")
lines = []     
for (idx, row) in enumerate(rows):
    line_type = type_map[(row['source'], row['target'])]
    lines.append({"detail": idx, "line_type": line_type, "x": row["source_x"], "y": row["source_y"]})
    lines.append({"detail": idx, "line_type": line_type, "x": row["target_x"], "y": row["target_y"]})
line_chart = alt.Chart(pd.DataFrame(lines)).mark_line().encode(x="x", y="y", detail="detail", color="line_type")
# alt.layer(*lines)
line_chart + source_points + target_points + labels;
# net_df

In [303]:
# # node_counter = {}
# # for node in G.nodes:    
# #     node_counter[node] = {
# #         "parents": len([1 for edge in G.edges if edge[1] == node]), 
# #         "children": len([1 for edge in G.edges if edge[0] == node])}
# # node_counter
recs = [x for x in rel_data.to_dict('records') if x['Verb'] != 'Inspired']
# recs = [x for x in rel_data.to_dict('records')]

nodes = list(set([x['Entity'] for x in recs]))  + list(set([x['System'] for x in recs]))
# node_to_parent {}
# for 
# nodes.sort()
# nodes
child_nodes = {}
parent_nodes = {}
for node in nodes:
    child_nodes[node] = []
    parent_nodes[node] = []
for row in recs:
    child_nodes[row["Entity"]].append(row['System'])
    parent_nodes[row["System"]].append(row['Entity'])
# links
output = []
# initialize as nodes working set
working_set = [node for node in nodes if not len(parent_nodes[node])]
while len(working_set):
    childset = []
    for node in working_set:
        for child in child_nodes[node]:
            childset.append(child)
    outset = []
    for child in childset:
        for parent in parent_nodes[child]:
            if not parent in childset:
                outset.append(child)
    output.append(list(set(working_set)))
    working_set = list(set(outset))


def prepare_chart(output, verbs):
    pre_df_rows = []
    pos_map = {}
    system_order = {}
    for (idx, row) in enumerate(output):
        for system in row:
            system_order[system] = idx
    for row in output:
        for (jdx, system) in enumerate(row):
            order = system_order[system]
            frac = jdx / (len(row) - 1)
            pos_map[system] = (order, frac)
            pre_df_rows.append({"system": system, "order": order, "frac": frac})

    lines = []
    for (idx, row) in enumerate(recs):
        src = row['Entity']
        trg = row['System']
        src_pos = pos_map[src]
        trg_pos = pos_map[trg]
        lines.append({"detail": idx, "x": src_pos[1], "y": src_pos[0], "verb": row["Verb"], "system": src})
        # lines.append({"detail": idx, "x": (src_pos[1] + trg_pos[1]) / 2, "y": src_pos[0] + random.uniform(0.2, 0.7), "verb": row["Verb"]})
        lines.append({"detail": idx, "x": trg_pos[1], "y": trg_pos[0], "verb": row["Verb"], "system": trg})

    charts = []
    lines_df = pd.DataFrame(lines)
    for verb in verbs:
        base = alt.Chart(lines_df[lines_df['verb'] == verb])
        line = base.mark_line(interpolate="natural").encode(
            x=alt.X("x", axis=None), 
            y=alt.Y("y", scale=alt.Scale(reverse=True), axis=None), 
            detail="detail", 
            strokeDash="verb",
            color=alt.Color("verb", scale=alt.Scale(range=["#AF00DB", "#EE0000", "#267f99", "#0000ff", "#d16969", "#795E26"])))
        text = base.mark_text(dy=-10).encode(x="x", y="y", text="system")
        point = base.mark_circle().encode(x="x", y="y", color="verb")
        charts.append(line + point + text)
    return alt.layer(*charts).configure_axis(grid=False).configure_view(strokeWidth=0)

In [304]:
output = [
    ['', 'Vega', '', 'P4'], 
    ['', '','Gemini 1', 'Vega-Lite', "PapARVis", 'Atom', 'P5', ''], 
    ['Gemini 2', 'Scholz 3D Vis Language', 'Multiclass-Density-Maps', 'Genome Spy', 'Gosling',  'SVL', 'VRIA', 'CompassQL', 'Ivy', 'P6']]
prepare_chart(output, ['Compiles', 'Wraps', 'Extends']).properties(height=200)