In [1]:
import csv
import math
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import random
import seaborn as sns
import rdata
import json

def remove_duplicates(lst):
    seen = set()
    result = []
    for item in lst:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result

### Pulling in data

In [2]:
conn = pd.read_csv(f'connections.csv')
names = pd.read_csv(f'names.csv')
cell_types = pd.read_csv(f'consolidated_cell_types.csv')
community_labels = pd.read_csv(f'processed_labels.csv')
cell_types

Unnamed: 0,root_id,primary_type,additional_type(s)
0,720575940596125868,T5c,
1,720575940597856265,Tm16,
2,720575940597944841,Tm7,CB3851
3,720575940598267657,TmY15,
4,720575940599333574,Tm1,
...,...,...,...
137672,720575940661335681,CB0180,
137673,720575940661336193,TmY3,
137674,720575940661337217,KCab-p,
137675,720575940661338497,CB0904,


In [3]:
# Schlegel et al. (2021)
# pick neuron classes listed in Fig. 1 -> exclude those without search results in codex
# anterior paired lateral neurons
receptors={'ALRN':'ALRN', 'ORN':'ORN', 'T/HRN':'HRN'} #'T/HRN'
antennal_lobe_local={'ALLN':'ALLN'}
antannal_lobe_projection={'ALPN':'ALPN'} #'uPN', 'mPN'
lat_horn_calyx_noMB = {'LHLN':'LHLN'} #'TOON', 'LHN', 'LHON', 'CN1'
lat_horn_calyx_MB = {'LHCENT':'LHCENT'}
MB = {'KC':'class_Kenyon_cell','MBON':'class_MBON','DAN':'class_DAN','WEDPN':'WEDPN', 'APL':'APL'}# 'CXN','DN', 'CN2'

#codex other keywords
other = {'olfactory': 'class_olfactory'}

neurons={'receptors':receptors, 'antennal_lobe_local':antennal_lobe_local, 'antannal_lobe_projection':antannal_lobe_projection,'lat_horn_calyx_noMB': lat_horn_calyx_noMB, 'lat_horn_calyx_MB':lat_horn_calyx_MB, 'MB': MB, 'other': other}

neuron_catalog_1={}
neuron_catalog_2={}
neuron_catalog_3={}

used_IDs=[]
used_cell_types=[]
used_neurons={}


for g_name, group in neurons.items():
    ly1_IDS=[]
    print('----------------------')
    print(g_name)
    print('----------------------')
    for cell_type_broad, codex_call in group.items():

        with open(f"codex_keywords/root_ids_{codex_call}.txt","r") as f:
            
            IDs = f.read().split(",")
            unique = remove_duplicates(IDs)
            print(f"codex_keywords/root_ids_{codex_call}.txt: {len(unique)} neurons")
            for ID in unique:
                ly1_IDS.append(ID)
                
        neuron_catalog_2[codex_call]=unique
        
        for ID in unique:
            type1 = np.array(cell_types.loc[cell_types['root_id'] == int(ID), 'primary_type'])
            type2 = np.array(cell_types.loc[cell_types['root_id'] == int(ID), 'additional_type(s)'])
            if len(type1)>0:
                if type1[0] not in used_cell_types:
                    used_cell_types.append(type1[0])
                if type1[0] not in neuron_catalog_3:
                    neuron_catalog_3[type1[0]] = [ID]
                else:
                    neuron_catalog_3[type1[0]].append(ID)
    neuron_catalog_1[g_name]=ly1_IDS


----------------------
receptors
----------------------
codex_keywords/root_ids_ALRN.txt: 2328 neurons
codex_keywords/root_ids_ORN.txt: 2299 neurons
codex_keywords/root_ids_HRN.txt: 98 neurons
----------------------
antennal_lobe_local
----------------------
codex_keywords/root_ids_ALLN.txt: 429 neurons
----------------------
antannal_lobe_projection
----------------------
codex_keywords/root_ids_ALPN.txt: 688 neurons
----------------------
lat_horn_calyx_noMB
----------------------
codex_keywords/root_ids_LHLN.txt: 514 neurons
----------------------
lat_horn_calyx_MB
----------------------
codex_keywords/root_ids_LHCENT.txt: 49 neurons
----------------------
MB
----------------------
codex_keywords/root_ids_class_Kenyon_cell.txt: 5177 neurons
codex_keywords/root_ids_class_MBON.txt: 96 neurons
codex_keywords/root_ids_class_DAN.txt: 331 neurons
codex_keywords/root_ids_WEDPN.txt: 105 neurons
codex_keywords/root_ids_APL.txt: 2 neurons
----------------------
other
----------------------
co

### 1. Celltypes (Top categories)

In [4]:
n=0
for k, ids in neuron_catalog_1.items():
    n+=len(ids)
print(n)

neuron_catalog_1

14397


{'receptors': ['720575940628660224',
  '720575940619051009',
  '720575940628553731',
  '720575940629020675',
  '720575940624384007',
  '720575940606869513',
  '720575940609228809',
  '720575940607221771',
  '720575940623302669',
  '720575940625825820',
  '720575940614037533',
  '720575940631494687',
  '720575940617166882',
  '720575940644028452',
  '720575940616921129',
  '720575940617986089',
  '720575940631429165',
  '720575940631314479',
  '720575940614996018',
  '720575940616093746',
  '720575940645576756',
  '720575940633059381',
  '720575940638818357',
  '720575940627890231',
  '720575940628619319',
  '720575940630282297',
  '720575940632633400',
  '720575940646142004',
  '720575940633509949',
  '720575940638212166',
  '720575940638867526',
  '720575940627677258',
  '720575940639080526',
  '720575940617109597',
  '720575940617232477',
  '720575940633313376',
  '720575940620574817',
  '720575940609884258',
  '720575940630257772',
  '720575940619419757',
  '720575940630462572',
  '

### 2. maintypes (middle categories)

In [5]:
print('--------counting ids--------')
n=0
for _, ids in neuron_catalog_2.items():
    n+=len(ids)
print(f'{n} ids in total')

neuron_catalog_2

--------counting ids--------
14397 ids in total


{'ALRN': ['720575940628660224',
  '720575940619051009',
  '720575940628553731',
  '720575940629020675',
  '720575940624384007',
  '720575940606869513',
  '720575940609228809',
  '720575940607221771',
  '720575940623302669',
  '720575940625825820',
  '720575940614037533',
  '720575940631494687',
  '720575940617166882',
  '720575940644028452',
  '720575940616921129',
  '720575940617986089',
  '720575940631429165',
  '720575940631314479',
  '720575940614996018',
  '720575940616093746',
  '720575940645576756',
  '720575940633059381',
  '720575940638818357',
  '720575940627890231',
  '720575940628619319',
  '720575940630282297',
  '720575940632633400',
  '720575940646142004',
  '720575940633509949',
  '720575940638212166',
  '720575940638867526',
  '720575940627677258',
  '720575940639080526',
  '720575940617109597',
  '720575940617232477',
  '720575940633313376',
  '720575940620574817',
  '720575940609884258',
  '720575940630257772',
  '720575940619419757',
  '720575940630462572',
  '72057

### 3. subtypes (Bottom categories)

In [6]:
print('--------counting ids--------')
n=0
for _, ids in neuron_catalog_3.items():
    n+=len(ids)
print(f'{n} ids in total')


print(f'-----deleting duplicates-----')
n=0
for k, ids in neuron_catalog_3.items():
    result=remove_duplicates(ids)
    neuron_catalog_3[k]=result
    n+=len(result)
    
print(f'{len(neuron_catalog_3.keys())} types in total')
print(f'{n} unique ids')

neuron_catalog_3

--------counting ids--------
14390 ids in total
-----deleting duplicates-----
598 types in total
9783 unique ids


{'ORN_DL2v': ['720575940628660224',
  '720575940618912427',
  '720575940620421153',
  '720575940616923905',
  '720575940622977799',
  '720575940624739728',
  '720575940632014284',
  '720575940638511267',
  '720575940634013869',
  '720575940659164545',
  '720575940620392374',
  '720575940629567473',
  '720575940613798195',
  '720575940610464195',
  '720575940619385583',
  '720575940632091951',
  '720575940620148059',
  '720575940612455917'],
 'ORN_VM4': ['720575940619051009',
  '720575940631494687',
  '720575940644429934',
  '720575940619690118',
  '720575940624736400',
  '720575940619649195',
  '720575940629455068',
  '720575940613161267',
  '720575940630716995',
  '720575940626826005',
  '720575940627964715',
  '720575940621648861',
  '720575940620477556',
  '720575940624180668',
  '720575940618610311',
  '720575940623509256',
  '720575940622075928',
  '720575940621437004',
  '720575940635592805',
  '720575940620814463',
  '720575940610926762',
  '720575940616546521',
  '7205759406377