## **Get fastq files for single-cell clytia data**

In [1]:
import requests
from tqdm import tnrange, tqdm_notebook

In [2]:
# Get doi links for all Starvation cDNA fastq.gz files
starvFiles = []
dois = ['10.22002/D1.1840','10.22002/D1.1841','10.22002/D1.1842','10.22002/D1.1843',
        '10.22002/D1.1844','10.22002/D1.1845','10.22002/D1.1846','10.22002/D1.1847',
        '10.22002/D1.1848','10.22002/D1.1849','10.22002/D1.1850','10.22002/D1.1851',
        '10.22002/D1.1852','10.22002/D1.1853','10.22002/D1.1854','10.22002/D1.1855'] #16 doi numbers
for doi in dois:
  url = 'https://api.datacite.org/dois/'+doi+'/media'
  r = requests.get(url).json()
  netcdf_url = r['data'][0]['attributes']['url']

  starvFiles += [netcdf_url]

In [5]:
with open('/home/tchari/clytia_alt_splice/starvFile.txt', 'w') as f:
    for line in starvFiles:
        f.write(f"{line}\n")

In [6]:
# Get doi links for all Stimulation cDNA fastq.gz files
stimFiles = []
dois = ['10.22002/D1.1860','10.22002/D1.1863','10.22002/D1.1864','10.22002/D1.1865',
        '10.22002/D1.1866','10.22002/D1.1868','10.22002/D1.1870','10.22002/D1.1871'] #8 numbers
for doi in dois:
  url = 'https://api.datacite.org/dois/'+doi+'/media'
  r = requests.get(url).json()
  netcdf_url = r['data'][0]['attributes']['url']

  stimFiles += [netcdf_url]

In [7]:
with open('/home/tchari/clytia_alt_splice/stimFile.txt', 'w') as f:
    for line in stimFiles:
        f.write(f"{line}\n")

**Save cell barcodes for desired animals/conditions**

In [8]:

def download_file(doi,ext):
    url = 'https://api.datacite.org/dois/'+doi+'/media'
    r = requests.get(url).json()
    netcdf_url = r['data'][0]['attributes']['url']
    r = requests.get(netcdf_url,stream=True)
    #Set file name
    fname = doi.split('/')[-1]+ext
    #Download file with progress bar
    if r.status_code == 403:
        print("File Unavailable")
    if 'content-length' not in r.headers:
        print("Did not get file")
    else:
        with open(fname, 'wb') as f:
            total_length = int(r.headers.get('content-length'))
            pbar = tnrange(int(total_length/1024), unit="B")
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    pbar.update()
                    f.write(chunk)
        return fname

In [10]:

#Kallisto bus clustered starvation data, h5ad
url = 'https://api.datacite.org/dois/'+'10.22002/D1.1796'+'/media'
r = requests.get(url).json()
netcdf_url = r['data'][0]['attributes']['url']
netcdf_url

'https://data.caltech.edu/records/mm6y6-g4569/files/fedStarved_withUMAPPaga.h5ad.gz'

In [12]:
#stimulation data
url = 'https://api.datacite.org/dois/'+'10.22002/D1.1821'+'/media'
r = requests.get(url).json()
netcdf_url_stim = r['data'][0]['attributes']['url']
netcdf_url_stim

'https://data.caltech.edu/records/brzrq-t3789/files/bus_stim.h5ad'

In [2]:
#Use wget --content-disposition to get these files ^


In [1]:
import anndata
meta_path = '/home/tchari/clytia_alt_splice/metadata'

In [12]:
#Save cell barcodes for fed/control, rep1 (starv)
fs_adata = anndata.read_h5ad(meta_path+'/fedStarved_withUMAPPaga.h5ad')
print(fs_adata)
#If -1, remove + add to list _1to4, or -2 for _5to8
fed_cells = list(fs_adata.obs_names[fs_adata.obs['fed'].isin(['True'])])
fed_1to4 = []
fed_5to8 = []
for i in fed_cells:
    if '-1' in i:
        fed_1to4 += [i.replace('-1','')]
    elif '-2' in i:
        fed_5to8 += [i.replace('-2','')]
    else:
        print('no -1 or -2')




#Save cell barcodes for SW/control, rep2 (stim)
stim_adata = anndata.read_h5ad(meta_path+'/bus_stim.h5ad')
print(stim_adata)

#If -1, remove + add to list _1to2, or -2 for _3to4
sw_cells = list(stim_adata.obs_names[stim_adata.obs['condition'].isin(['SW'])])
sw_1to2 = []
sw_3to4 = []
for i in sw_cells:
    if '-1' in i:
        sw_1to2 += [i.replace('-1','')]
    elif '-2' in i:
        sw_3to4 += [i.replace('-2','')]
    else:
        print('no -1 or -2')
    

AnnData object with n_obs × n_vars = 13673 × 8696
    obs: 'batch', 'n_counts', 'n_countslog', 'louvain', 'leiden', 'orgID', 'fed', 'starved', 'fed_neighbor_score', 'cellRanger_louvain', 'annos', 'new_cellRanger_louvain', 'annosSub'
    var: 'n_counts', 'mean', 'std'
    uns: 'annosSub_colors', 'annos_colors', 'cellRanger_louvain_colors', 'cellRanger_louvain_sizes', "dendrogram_['new_cellRanger_louvain']", 'dendrogram_new_cellRanger_louvain', 'fed_colors', 'fed_neighbor_score_colors', 'leiden', 'leiden_colors', 'louvain', 'louvain_colors', 'neighbors', 'new_cellRanger_louvain_colors', 'orgID_colors', 'paga', 'pca', 'rank_genes_groups', 'umap'
    obsm: 'X_nca', 'X_pca', 'X_tsne', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
AnnData object with n_obs × n_vars = 18921 × 10260
    obs: 'batch', 'n_counts', 'n_countslog', 'louvain', 'condition', 'orgID', 'cellRanger_louvain', 'annos', 'annosSub'
    var: 'n_counts', 'mean', 'std'
    uns: 'cellRanger_louvain_colors', 'c

In [16]:
with open('/home/tchari/clytia_alt_splice/metadata/fed_1to4.txt', 'w') as f:
    for line in fed_1to4:
        f.write(f"{line}\n")
with open('/home/tchari/clytia_alt_splice/metadata/fed_5to8.txt', 'w') as f:
    for line in fed_5to8:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/sw_1to2.txt', 'w') as f:
    for line in sw_1to2:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/sw_3to4.txt', 'w') as f:
    for line in sw_3to4:
        f.write(f"{line}\n")

544

In [26]:
fs_adata[fs_adata.obs['fed'].isin(['True']) & fs_adata.obs['annos'].isin(['Nematocyte'])].obs['orgID'].value_counts() #2,3 and 1,4

orgID
3    304
1    273
4    271
5    270
2    253
Name: count, dtype: int64

In [27]:
#Save cell barcodes for nemato, half the individs, rep1 (starv)
#Save cell barcodes for nemato, other half the individs, rep2 (starv)

#If -1, remove + add to list _1to4, or -2 for _5to8
nem_adata = fs_adata[fs_adata.obs['fed'].isin(['True']) & fs_adata.obs['annos'].isin(['Nematocyte'])]
nem_23_cells = list(nem_adata.obs_names[nem_adata.obs['orgID'].isin(['2','3'])])
nem_14_cells = list(nem_adata.obs_names[nem_adata.obs['orgID'].isin(['1','4'])])
nem23_1to4 = []
nem23_5to8 = []

nem14_1to4 = []
nem14_5to8 = []
for i in nem_23_cells:
    if '-1' in i:
        nem23_1to4 += [i.replace('-1','')]
    elif '-2' in i:
        nem23_5to8 += [i.replace('-2','')]
    else:
        print('no -1 or -2')

for i in nem_14_cells:
    if '-1' in i:
        nem14_1to4 += [i.replace('-1','')]
    elif '-2' in i:
        nem14_5to8 += [i.replace('-2','')]
    else:
        print('no -1 or -2')


with open('/home/tchari/clytia_alt_splice/metadata/nem23_1to4.txt', 'w') as f:
    for line in nem23_1to4:
        f.write(f"{line}\n")
with open('/home/tchari/clytia_alt_splice/metadata/nem23_5to8.txt', 'w') as f:
    for line in nem23_5to8:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/nem14_1to4.txt', 'w') as f:
    for line in nem14_1to4:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/nem14_5to8.txt', 'w') as f:
    for line in nem14_5to8:
        f.write(f"{line}\n")


In [32]:
#STARVED cells
nem_adata = fs_adata[fs_adata.obs['starved'] & fs_adata.obs['annos'].isin(['Nematocyte'])]
nem_adata.obs['orgID'].value_counts() #7,8 and 9,6

orgID
9     348
7     317
10    307
8     260
6     216
Name: count, dtype: int64

In [37]:
#Save cell barcodes for nemato, half the individs, rep1 (control)
#Save cell barcodes for nemato, other half the individs, rep2 (control)

#If -1, remove + add to list _1to4, or -2 for _5to8
nem_23_cells = list(nem_adata.obs_names[nem_adata.obs['orgID'].isin(['7','8'])])
nem_14_cells = list(nem_adata.obs_names[nem_adata.obs['orgID'].isin(['9','6'])])
nem23_1to4 = []
nem23_5to8 = []

nem14_1to4 = []
nem14_5to8 = []
for i in nem_23_cells:
    if '-1' in i:
        nem23_1to4 += [i.replace('-1','')]
    elif '-2' in i:
        nem23_5to8 += [i.replace('-2','')]
    else:
        print('no -1 or -2')

for i in nem_14_cells:
    if '-1' in i:
        nem14_1to4 += [i.replace('-1','')]
    elif '-2' in i:
        nem14_5to8 += [i.replace('-2','')]
    else:
        print('no -1 or -2')


with open('/home/tchari/clytia_alt_splice/metadata/nem23_1to4_starv.txt', 'w') as f:
    for line in nem23_1to4:
        f.write(f"{line}\n")
with open('/home/tchari/clytia_alt_splice/metadata/nem23_5to8_starv.txt', 'w') as f:
    for line in nem23_5to8:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/nem14_1to4_starv.txt', 'w') as f:
    for line in nem14_1to4:
        f.write(f"{line}\n")

with open('/home/tchari/clytia_alt_splice/metadata/nem14_5to8_starv.txt', 'w') as f:
    for line in nem14_5to8:
        f.write(f"{line}\n")

In [None]:
#Make .sh files for umi_tools
#Make .sh files for rmats with fastqs