In [1]:
from worm_wiring import worm_wiring
import pandas as pd
import networkx as nx
import numpy as np

In [2]:
# adjacency matrix, rows presynaptic, cols post synaptic
struct = {'structure': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMTYtZGF0YTEtdjEueGxzeA--/elife-16962-fig16-data1-v1.xlsx?_hash=ltleHMu0Mu0ujKuGWUeIsAsFtqARmr7badHc8RYWqcA%3D"}

# cell type, characteristics, of cells, Annotation(e.g. sensory, inter, motor)
# annoyingly, doesn't list ids in adj. must infer, e.g. in Abreviation is PR-I,
# and there are 10 cells of this, then matching ids will pr
base_atts = {'basic_atts': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMS1kYXRhMS12MS54bHN4/elife-16962-fig1-data1-v1.xlsx?_hash=RJKemxiTOb3fYQn5SXeog7w8Lzn%2F9Cka4BwHFyruMaM%3D"}

# Neurons listed by ID, with cell type, morphological features, location, presence or absence of cilia, and number
# of each neuron’s pre- and postsynaptic sites or putative gap junctions (>0.06 µm). Ependymal cells excluded. The
# final column shows left lateral views of individual neuron reconstructions (whole cells, or terminals for
# photoreceptors) as small thumbnails with scale bars: 1 µm (thick bars: coronet cells, lens cells, photoreceptor
# terminals and PR-III cells, vacINs); 10 µm (thin bars: all other cells).
cell_data = {'cell_bio_data': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMy1kYXRhMS12MS54bHN4/elife-16962-fig3-data1-v1.xlsx?_hash=PIvd%2FA8L6avkM1Y3ebK9%2BlgoGGiskaH53PsAYK%2BtGyM%3D"}


Load files using the worm wiring xlsx loader

In [3]:
files = {**struct, **base_atts, **cell_data}
dfs = {f: worm_wiring.pull_xl_file(files[f]) for f in files}
print('Got excel data')


Got excel data


In [4]:
dfs['structure'][0].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,207,208,209,210,211,212,213,214,215,216
0,,pns1,pns10,pns11,pns12,pns13,pns14,pns2,pns3,pns4,...,BTN2,BTN3,BTN4,mul,mulm,mur,murm,bm,bm-noto,Total
1,pns1,,,,,0.96,,,0.12,0.18,...,,,,,,,,0.24,,4.38
2,pns10,0.66,,1.62,0.72,0.12,,,0.9,1.26,...,,,,,,,,,,13.98
3,pns11,,0.42,,0.18,,,,0.24,0.12,...,,,,,,,,,,16.19
4,pns12,0.12,1.02,0.24,,0.54,,,1.02,0.36,...,,,,,,,,,,12.78


In [5]:
dfs['basic_atts'][0].head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,Abbreviation,Colour,Cell Type,Characteristics,Subtypes,Number of cells,Cell IDs,Annotation
1,PR-I,,Type I Photoreceptors,Outer segments into ocellus pigment,"(osa, osd, osp: outer segments project to ante...",23,pr1-pr23,Sensory
2,PR-II,,Type II Photoreceptors,Outer segments directly into neural canal near...,"(osa, osd, osp: outer segments project to ante...",7,pra-prg,Sensory
3,PR-III,,Type III Photoreceptors,Ventral vacuolated cells between Cor and lens ...,-,6,"lens6-7, 84, 101, 110, 113, 114",Sensory
4,Lens,,Lens cells,"Large, vacuolated cells ventral to ocellus pig...",-,3,"lens4, lens5, lens8",Accessory


Find the connections and correctly label the nodes 

In [6]:
adj = dfs['structure'][0]
col_labels = list(adj.iloc[0, :])
row_labels = list(adj.iloc[:, 0])
col_labels = [l for l in col_labels]
col_lab_set = set(col_labels)
row_labels = {l for l in row_labels}
joint_labels = col_lab_set.intersection(row_labels)

In [7]:
# process the structure
adj.columns = col_labels
selected_cols = adj[list(joint_labels)]
common_adj = selected_cols.loc[selected_cols[np.nan].isin(joint_labels)]
np_adj = common_adj.values
integerized = np_adj[1:, 1:].astype(np.float64)
graph = nx.from_numpy_array(integerized, create_using=nx.DiGraph)
ids = common_adj.iloc[1:, 0]
node_map = {n: str(ids.iloc[n]).strip() for n in graph.nodes()}
nx.relabel_nodes(graph, node_map, copy=False)
print('got adj to nx')

got adj to nx


Get node attribute data

In [13]:
cell_bio_data = dfs['cell_bio_data'][0].iloc[:, list(range(13))]
man_cols = ['Cell ID', 'Cell Type', 'Ciliated', 'Side', 'Brain Region', 'X', 'Y', 'Z', 'Morphology Description', 'Cell Body Volume', 'Number of Pre-Synaptic Sites', 'Number of Post-Synaptic Sites', 'Number of Putative Gap Junctions']
cell_bio_data.columns = man_cols
cell_bio_data.drop(cell_bio_data.index[[0, 1]])
def str_strip(item):
    item = str(item)
    item.strip()
    return item
cell_bio_data['Cell ID'] = pd.DataFrame(cell_bio_data['Cell ID']).applymap(str_strip)
cell_bio_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0,Cell ID,Cell Type,Ciliated,Side,Brain Region,X,Y,Z,Morphology Description,Cell Body Volume,Number of Pre-Synaptic Sites,Number of Post-Synaptic Sites,Number of Putative Gap Junctions
0,Cell ID,Cell Type,Ciliated,Soma location,,,,,Morphology,Cell body volume,Number of pre-synaptic sites,Number of post-synaptic sites,Number of putative gap junctions
1,,,,Side,Brain region,Z,X,Y,,,,,
2,1,cor-ass BVIN,ciliated to bp,L,anterior BV,0.83,9.5,30.6,Single simple axon with slight expansion at te...,94.7191,2,21,-
3,2,cor-ass BVIN,ciliated to bp,L,anterior BV,1.25,16.5,32.8,Simple cell with single unbranched axon to pos...,100.91,12,14,2
4,3,BVIN,ciliated,L,anterior BV,0.17,23.2,31.9,Simple cell body with single axon that expands...,24.3679,6,5,2


In [14]:
# assign node attributes
for n in graph.nodes:
    try:
        data = cell_bio_data.loc[cell_bio_data['Cell ID'] == n]
    except KeyError:
        continue
    data = data.to_dict()
    data.pop('Cell ID')
    graph.add_node(n, attr_dict=data)


put in json spec 
