In [1]:
import sys
sys.path.append("../")

In [2]:
from worm_wiring import worm_wiring
import pandas as pd
import networkx as nx
import numpy as np
import math

In [3]:
# adjacency matrix, rows presynaptic, cols post synaptic
struct = {'structure': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMTYtZGF0YTEtdjEueGxzeA--/elife-16962-fig16-data1-v1.xlsx?_hash=ltleHMu0Mu0ujKuGWUeIsAsFtqARmr7badHc8RYWqcA%3D"}

# cell type, characteristics, of cells, Annotation(e.g. sensory, inter, motor)
# annoyingly, doesn't list ids in adj. must infer, e.g. in Abreviation is PR-I,
# and there are 10 cells of this, then matching ids will pr
base_atts = {'basic_atts': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMS1kYXRhMS12MS54bHN4/elife-16962-fig1-data1-v1.xlsx?_hash=RJKemxiTOb3fYQn5SXeog7w8Lzn%2F9Cka4BwHFyruMaM%3D"}

# Neurons listed by ID, with cell type, morphological features, location, presence or absence of cilia, and number
# of each neuron’s pre- and postsynaptic sites or putative gap junctions (>0.06 µm). Ependymal cells excluded. The
# final column shows left lateral views of individual neuron reconstructions (whole cells, or terminals for
# photoreceptors) as small thumbnails with scale bars: 1 µm (thick bars: coronet cells, lens cells, photoreceptor
# terminals and PR-III cells, vacINs); 10 µm (thin bars: all other cells).
cell_data = {'cell_bio_data': "https://elifesciences.org/download/aHR0cHM6Ly9jZG4uZWxpZmVzY2llbmNlcy5vcmcvYXJ0aWNsZXMvMTY5NjIvZWxpZmUtMTY5NjItZmlnMy1kYXRhMS12MS54bHN4/elife-16962-fig3-data1-v1.xlsx?_hash=PIvd%2FA8L6avkM1Y3ebK9%2BlgoGGiskaH53PsAYK%2BtGyM%3D"}


Load files using the worm wiring xlsx loader

In [4]:
files = {**struct, **base_atts, **cell_data}
dfs = {f: worm_wiring.pull_xl_file(files[f]) for f in files}
print('Got excel data')


Got excel data


In [5]:
dfs['structure'][0].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,207,208,209,210,211,212,213,214,215,216
0,,pns1,pns10,pns11,pns12,pns13,pns14,pns2,pns3,pns4,...,BTN2,BTN3,BTN4,mul,mulm,mur,murm,bm,bm-noto,Total
1,pns1,,,,,0.96,,,0.12,0.18,...,,,,,,,,0.24,,4.38
2,pns10,0.66,,1.62,0.72,0.12,,,0.9,1.26,...,,,,,,,,,,13.98
3,pns11,,0.42,,0.18,,,,0.24,0.12,...,,,,,,,,,,16.19
4,pns12,0.12,1.02,0.24,,0.54,,,1.02,0.36,...,,,,,,,,,,12.78


In [6]:
dfs['basic_atts'][0].head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,Abbreviation,Colour,Cell Type,Characteristics,Subtypes,Number of cells,Cell IDs,Annotation
1,PR-I,,Type I Photoreceptors,Outer segments into ocellus pigment,"(osa, osd, osp: outer segments project to ante...",23,pr1-pr23,Sensory
2,PR-II,,Type II Photoreceptors,Outer segments directly into neural canal near...,"(osa, osd, osp: outer segments project to ante...",7,pra-prg,Sensory
3,PR-III,,Type III Photoreceptors,Ventral vacuolated cells between Cor and lens ...,-,6,"lens6-7, 84, 101, 110, 113, 114",Sensory
4,Lens,,Lens cells,"Large, vacuolated cells ventral to ocellus pig...",-,3,"lens4, lens5, lens8",Accessory


Find the connections and correctly label the nodes

In [7]:
def str_strip(item):
    if type(item) is not str:
        if math.isnan(item):
            item = 'nan'
        else:
            item = int(item)
    item = str(item)
    item.strip()
    return item
adj = dfs['structure'][0]
col_labels = list(adj.iloc[0, :])
row_labels = list(adj.iloc[:, 0])
col_labels = [str_strip(l) for l in col_labels]
col_lab_set = set(col_labels)
row_labels = {str_strip(l) for l in row_labels}
joint_labels = col_lab_set.intersection(row_labels)
to_add_c = row_labels - col_lab_set
to_add_r = col_lab_set - row_labels 
print(to_add_c)
print(to_add_r)

{'pnw', '19', 'coronet10', '44', 'lens6', '40', 'coronet5'}
{'mulm', '5', '35', '113', 'bm-noto', 'midtail7', 'coronet14', 'bm', 'pns2', 'coronet2', '181', 'murm', 'lens7', 'pns14', '57', 'mul', 'mur'}


In [8]:
# process the structure
common_adj = adj.copy()
common_adj.iloc[1:, 1:] = common_adj.iloc[1:, 1:].fillna(0)
new_index = common_adj[0].apply(str_strip)
common_adj = common_adj.set_index(new_index)
common_adj.columns = common_adj.iloc[0, :].apply(str_strip)
common_adj.drop(common_adj.index[0], axis=0, inplace=True)
common_adj.drop(common_adj.columns[0], axis=1, inplace=True)
common_adj.drop('Total', axis=0, inplace=True)
common_adj.drop('Total', axis=1, inplace=True)
common_adj.head()

nan,pns1,pns10,pns11,pns12,pns13,pns14,pns2,pns3,pns4,pns5,...,BTN1,BTN2,BTN3,BTN4,mul,mulm,mur,murm,bm,bm-noto
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
pns1,0.0,0.0,0.0,0.0,0.96,0,0,0.12,0.18,0.24,...,0.0,0,0,0,0,0,0,0,0.24,0
pns10,0.66,0.0,1.62,0.72,0.12,0,0,0.9,1.26,0.0,...,0.0,0,0,0,0,0,0,0,0.0,0
pns11,0.0,0.42,0.0,0.18,0.0,0,0,0.24,0.12,0.0,...,0.47,0,0,0,0,0,0,0,0.0,0
pns12,0.12,1.02,0.24,0.0,0.54,0,0,1.02,0.36,0.0,...,0.0,0,0,0,0,0,0,0,0.0,0
pns13,0.78,0.0,0.0,0.0,0.0,0,0,0.42,0.24,0.0,...,0.0,0,0,0,0,0,0,0,0.18,0


In [9]:
for item in to_add_c:
    common_adj[item] = [0 for _ in range(len(common_adj.index))]

for item in to_add_r:
    item_dict = {c:0 for c in common_adj.columns[1:]}
    common_adj = common_adj.append(pd.Series(item_dict, name=item))
common_adj.head()



nan,pns1,pns10,pns11,pns12,pns13,pns14,pns2,pns3,pns4,pns5,...,murm,bm,bm-noto,pnw,19,coronet10,44,lens6,40,coronet5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
pns1,0.0,0.0,0.0,0.0,0.96,0,0,0.12,0.18,0.24,...,0,0.24,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pns10,0.66,0.0,1.62,0.72,0.12,0,0,0.9,1.26,0.0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pns11,0.0,0.42,0.0,0.18,0.0,0,0,0.24,0.12,0.0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pns12,0.12,1.02,0.24,0.0,0.54,0,0,1.02,0.36,0.0,...,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
pns13,0.78,0.0,0.0,0.0,0.0,0,0,0.42,0.24,0.0,...,0,0.18,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
common_adj.sort_index(axis=0, inplace=True)
common_adj.sort_index(axis=1, inplace=True)
common_adj.head()

nan,1,100,101,103,105,106,107,108,110,112,...,prb,prc,prd,pre,prf,prg,trIN,ukn,ukn2,vacIN1
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0.0,0.0,0.0,0
100,0.0,0.3,0.0,0.0,0.54,0.12,0.0,0.0,0.0,1.38,...,0,0,0,0,0,0,0.06,0.0,0.36,0
101,0.0,0.0,0.18,0.0,0.3,0.0,0.18,0.0,0.18,0.0,...,0,0,0,0,0,0,0.0,0.0,0.0,0
103,0.0,0.0,0.0,0.0,0.0,0.54,0.0,0.42,0.0,0.3,...,0,0,0,0,0,0,0.0,0.0,0.0,0
105,0.0,0.6,0.06,0.0,0.0,0.18,0.0,0.12,0.0,0.0,...,0,0,0,0,0,0,0.0,0.24,0.0,0


In [11]:
if list(common_adj.index) != list(common_adj.columns):
    print((set(common_adj.index) | set(common_adj.columns)) - (set(common_adj.index).intersection(set(common_adj.columns))))
    raise ValueError
common_adj.iloc[:, :] = common_adj.values.astype(np.float)
graph = nx.from_pandas_adjacency(common_adj, create_using=nx.DiGraph)
# np_adj = common_adj.values.astype(np.float)
# np.nan_to_num(np_adj, 0.)
# graph = nx.from_numpy_matrix(np_adj, parallel_edges=False, create_using=nx.DiGraph)
print('got adj to nx')

got adj to nx


Get node attribute data

In [12]:
cell_bio_data = dfs['cell_bio_data'][0].iloc[:, list(range(13))]
man_cols = ['Cell ID', 'Cell Type', 'Ciliated', 'Side', 'Brain Region', 'X', 'Y', 'Z', 'Morphology Description', 'Cell Body Volume', 'Number of Pre-Synaptic Sites', 'Number of Post-Synaptic Sites', 'Number of Putative Gap Junctions']
cell_bio_data.columns = man_cols
cell_bio_data.drop(cell_bio_data.index[[0, 1]])
cell_bio_data['Cell ID'] = pd.DataFrame(cell_bio_data['Cell ID']).applymap(str_strip)
cell_bio_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,Cell ID,Cell Type,Ciliated,Side,Brain Region,X,Y,Z,Morphology Description,Cell Body Volume,Number of Pre-Synaptic Sites,Number of Post-Synaptic Sites,Number of Putative Gap Junctions
0,Cell ID,Cell Type,Ciliated,Soma location,,,,,Morphology,Cell body volume,Number of pre-synaptic sites,Number of post-synaptic sites,Number of putative gap junctions
1,,,,Side,Brain region,Z,X,Y,,,,,
2,1,cor-ass BVIN,ciliated to bp,L,anterior BV,0.83,9.5,30.6,Single simple axon with slight expansion at te...,94.7191,2,21,-
3,2,cor-ass BVIN,ciliated to bp,L,anterior BV,1.25,16.5,32.8,Simple cell with single unbranched axon to pos...,100.91,12,14,2
4,3,BVIN,ciliated,L,anterior BV,0.17,23.2,31.9,Simple cell body with single axon that expands...,24.3679,6,5,2


In [13]:
# assign node attributes
for n in graph.nodes:
    try:
        data = cell_bio_data.loc[cell_bio_data['Cell ID'] == n]
    except KeyError:
        continue
    try:
        data = data.to_dict('r')[0]
    except IndexError:
        continue    
    print(data)
    data.pop('Cell ID')
    graph.add_node(n, **data)


{'Cell ID': '1', 'Cell Type': 'cor-ass BVIN', 'Ciliated': 'ciliated to bp', 'Side': 'L', 'Brain Region': 'anterior BV', 'X': 0.8300000000000003, 'Y': 9.5, 'Z': 30.6, 'Morphology Description': 'Single simple axon with slight expansion at terminal in PBV', 'Cell Body Volume': 94.7191, 'Number of Pre-Synaptic Sites': 2, 'Number of Post-Synaptic Sites': 21, 'Number of Putative Gap Junctions': '-'}
{'Cell ID': '100', 'Cell Type': 'prRN', 'Ciliated': 'no vacuole or membrane contact, does not enter canal', 'Side': 'L', 'Brain Region': 'posterior BV', 'X': 22.24999999999994, 'Y': 14.77, 'Z': 32.17, 'Morphology Description': 'multiple branches from main axon initial region, axon then bifurcates to form two collateral branches, one terminating in anterior MG the other in posterior MG, both simple terminals', 'Cell Body Volume': 317.27, 'Number of Pre-Synaptic Sites': 56, 'Number of Post-Synaptic Sites': 90, 'Number of Putative Gap Junctions': 22}
{'Cell ID': '103', 'Cell Type': 'non-sensory RN',



In [14]:
# add fig 1 data 
basic_atts = dfs['basic_atts'][0].copy()
basic_atts.columns = basic_atts.iloc[0, :]
basic_atts.drop(basic_atts.index[0], axis=0, inplace=True)
basic_atts.head()

Unnamed: 0,Abbreviation,Colour,Cell Type,Characteristics,Subtypes,Number of cells,Cell IDs,Annotation
1,PR-I,,Type I Photoreceptors,Outer segments into ocellus pigment,"(osa, osd, osp: outer segments project to ante...",23,pr1-pr23,Sensory
2,PR-II,,Type II Photoreceptors,Outer segments directly into neural canal near...,"(osa, osd, osp: outer segments project to ante...",7,pra-prg,Sensory
3,PR-III,,Type III Photoreceptors,Ventral vacuolated cells between Cor and lens ...,-,6,"lens6-7, 84, 101, 110, 113, 114",Sensory
4,Lens,,Lens cells,"Large, vacuolated cells ventral to ocellus pig...",-,3,"lens4, lens5, lens8",Accessory
5,vacIN,,Photoreceptor associated vacuolated neurons,Anterior vacuolated neurons with short axons a...,Lens1 extends farther along neuropil and makes...,2,"lens1, lens2",Interneuron


In [23]:
for n in graph.nodes(data=True):
    nid = n[0]
    try:
        data = basic_atts.loc[basic_atts['Abbreviation'].apply(str_strip) == str_strip(n[1]['Cell Type'])]
    except KeyError:
        try:
            data = basic_atts.loc[nid in basic_atts['Cell IDs']]
        except KeyError:
            continue
    try:
        data = data.to_dict('r')[0]
        data.pop('Abbreviation')
        data.pop('Cell Type')
        data.pop('Number of cells')
        data.pop('Cell IDs')
        graph.add_node(nid, **data)
    except:
        print("An error occurred")
        continue

An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred
An error occurred




put in json spec

In [24]:
from graph import GraphIO
GraphIO.dump(graph, './ciona.json')
