# Part 0: Load in libraries

## runs using graph_env - needs d3blocks

In [1]:
import numpy as np
import pandas as pd
import seaborn as sbn
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
import re
import math
import scipy
from scipy.stats import ks_2samp
from matplotlib.patches import Rectangle
import pathlib
from matplotlib import font_manager
from scipy.stats import fisher_exact


# Setup the fonts
fonts_path = pathlib.Path.home().joinpath('fonts') # i.e. `~/fonts` (update as needed)
font_filename = 'ARIAL.TTF'
font_path = fonts_path.joinpath(font_filename)
print(font_path)

font_manager.fontManager.addfont(str(font_path))
prop = font_manager.FontProperties(fname=font_path)

plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = prop.get_name()
plt.rcParams['pdf.fonttype'] = 42 # Makes text editiable
plt.rcParams['ps.fonttype'] = 42

COLOR_MAP = dict()
COLOR_MAP['A']         = '#8c8c8c'
COLOR_MAP['CHNQST']    = '#077a15'
COLOR_MAP['DE']        = '#e23a36'
COLOR_MAP['FWY']       = '#f58220'
COLOR_MAP['G']         = '#86bd8d'
COLOR_MAP['ILMV']      = '#231f20'
COLOR_MAP['KR']        = '#1725c2'
COLOR_MAP['P']         = '#c25fc9'


# Using the global color map, we convert the colormap of amino acid
# groups, to a map against individual amino acids. This
# serves as our internal color map which is used for styling.
aacolor_map=dict()
for amino_acids in COLOR_MAP:
    for amino_acid in amino_acids:
        aacolor_map[amino_acid] = COLOR_MAP[amino_acids]

/home/research/kiersten.ruff/fonts/ARIAL.TTF


# Part 1: Connect ribosomal biogenesis processes to IDR grammars

## 1.1 - Load in nucleoli complex information

In [2]:
dfcompclust=pd.read_excel('data/Ribosomal_biogeneis_factors_w_clusters.xlsx', engine='openpyxl')
print(dfcompclust)

ccgeneralprocess=dfcompclust['General Process'].tolist()
cccategory=dfcompclust['Category'].tolist()
ccIDR=dfcompclust['IDR'].tolist()
cccluster7=dfcompclust['Cluster 7'].tolist()
cccluster18=dfcompclust['Cluster 18'].tolist()
cccluster23=dfcompclust['Cluster 23'].tolist()
ccclusterany=dfcompclust['Cluster 7, 18, or 23'].tolist()
ccgene=dfcompclust['Gene'].tolist()
ccacc=dfcompclust['Accession'].tolist()

uniccgeneralprocess=[]
unicccategory=[]
for s in range(0,len(ccgeneralprocess)):
    if ccgeneralprocess[s] not in uniccgeneralprocess:
        uniccgeneralprocess.append(ccgeneralprocess[s])
    if cccategory[s] not in unicccategory:
        unicccategory.append(cccategory[s])

print(uniccgeneralprocess)
print(unicccategory)
print(len(unicccategory))

     Unnamed: 0 Accession    Gene  \
0             0    P19388  POLR2E   
1             1    P61218  POLR2F   
2             2    P52434  POLR2H   
3             3    P62875  POLR2L   
4             4    P53803  POLR2K   
..          ...       ...     ...   
269         269    Q92499    DDX1   
270         270    Q9ULT8  HECTD1   
271         271    Q9P275   USP36   
272         272    O95071    UBR5   
273         273    Q9Y4B6   DCAF1   

                                   General Process                   Category  \
0                               rDNA Transcription  RNA Polymerase I subunits   
1                               rDNA Transcription  RNA Polymerase I subunits   
2                               rDNA Transcription  RNA Polymerase I subunits   
3                               rDNA Transcription  RNA Polymerase I subunits   
4                               rDNA Transcription  RNA Polymerase I subunits   
..                                             ...                   

## 1.2 - Create data for plotting

In [3]:
source=[]
target=[]
weight=[]

for i in range(len(uniccgeneralprocess)-1,-1,-1): #range(0,len(uniccgeneralprocess)):
    print(i)
    for j in range(len(unicccategory)-1,-1,-1): #range(0,len(unicccategory)):
        pos1=[m for m, x in enumerate(ccgeneralprocess) if x == uniccgeneralprocess[i]]
        pos2=[m for m, x in enumerate(cccategory) if x == unicccategory[j]]
        posinter=list(set(pos1) & set(pos2))


        if posinter:
            source.append(uniccgeneralprocess[i])
            target.append(unicccategory[j])
            weight.append(len(posinter))

for j in range(len(unicccategory)-1,-1,-1): #range(0,len(unicccategory)):
    pos2=[m for m, x in enumerate(cccategory) if x == unicccategory[j]]        
    tmpidr=0
    tmp7=0
    tmp18=0
    tmp23=0
    for p in pos2:
        if ccIDR[p]=='Yes':
            tmpidr=tmpidr+1
        if cccluster7[p]=='Yes':
            tmp7=tmp7+1
        if cccluster18[p]=='Yes':
            tmp18=tmp18+1
        if cccluster23[p]=='Yes':
            tmp23=tmp23+1

    if tmpidr==0:
        source.append(unicccategory[j])
        target.append('No IDRs in Complex')
        weight.append(1)

    elif tmpidr>0 and tmp7+tmp18+tmp23==0:
        source.append(unicccategory[j])
        target.append('IDRs in complex not in 7, 18, or 23')
        weight.append(tmpidr)

    elif tmpidr>0 and tmp7+tmp18+tmp23>0:
        if tmp18>0:
            source.append(unicccategory[j])
            target.append('IDRs in complex in 18')
            weight.append(tmp18)
        if tmp23>0:
            source.append(unicccategory[j])
            target.append('IDRs in complex in 23')
            weight.append(tmp23)
        if tmp7>0:
            source.append(unicccategory[j])
            target.append('IDRs in complex in 7')
            weight.append(tmp7)


6
5
4
3
2
1
0


## 1.3 - Plot data

In [4]:
dftmp=pd.DataFrame()

dftmp['source']=source
dftmp['target']=target
dftmp['weight']=weight

print(dftmp)
print(dftmp[dftmp['source']=='Elongation'])

import d3blocks
from d3blocks import D3Blocks
d3 = D3Blocks(chart='Sankey', frame=True)

tmpcolorlist=['#3d2663', '#872f74', '#cb3f73', '#ff6361','#276026', '#008770', '#00aabe', '#64c8ff']


d3.set_node_properties(dftmp)
d3.set_node_properties(dftmp, padding=1, color={'IDRs in complex in 7': '#e23c36', 'IDRs in complex in 18':'#000000', 'IDRs in complex in 23':'#1d9bd7','IDRs in complex not in 7, 18, or 23':'#dddddd','No IDRs in Complex':'#dddddd', \
                                     'rDNA Transcription':'#ff6361', 'rRNA Modification':'#872f74', 'Factors involved in pre‐rRNA processing':'#3d2663', 'Chaperones of ribosomal proteins':'#cb3f73', \
                                     'SSU processome subcomplexes and early assembly factors':'#276026','Factors involved in nucleolar steps of 60S maturation':'#008770', 'Other factors involved in ribosome biogenesis':'#00aabe'})
#d3.set_node_properties(dftmp, color=mycolorsdict)
d3.set_edge_properties(dftmp, color='target', opacity='target')
#d3.show(figsize=[600, 600])
d3.show(figsize=[500, 1500])

                                                source  \
0        Other factors involved in ribosome biogenesis   
1    Factors involved in nucleolar steps of 60S mat...   
2    Factors involved in nucleolar steps of 60S mat...   
3    Factors involved in nucleolar steps of 60S mat...   
4    Factors involved in nucleolar steps of 60S mat...   
..                                                 ...   
103                      PIC formation/promoter escape   
104                      PIC formation/promoter escape   
105                      PIC formation/promoter escape   
106                          RNA Polymerase I subunits   
107                          RNA Polymerase I subunits   

                         target  weight  
0                  Other factor       7  
1              60S other factor      13  
2                      B factor      11  
3    B factor Rpf2‐Rrs1 complex       2  
4    B factor Nip7‐Nop2 complex       2  
..                          ...     ...  
103      

[d3blocks] >INFO> Cleaning edge_properties and config parameters..
[d3blocks] >INFO> Initializing [Sankey]
[d3blocks] >INFO> Create directory: [/tmp/d3blocks]
[d3blocks] >INFO> filepath is set to [/tmp/d3blocks/sankey.html]
[d3blocks] >INFO> Convert to DataFrame.
[d3blocks] >INFO> Node properties are set.
[d3blocks] >INFO> Convert to DataFrame.
[d3blocks] >INFO> Node properties are set.
[d3blocks] >INFO> Edge properties are set.
[d3blocks] >INFO> Set [figsize]: [500, 1500]
[d3blocks] >INFO> Open browser: /tmp/d3blocks/sankey.html


## 1.4 - Save data to excel file

In [5]:
with pd.ExcelWriter('../Supplementary_Tables/Table_S2.xlsx', engine='openpyxl',mode='a') as writer:
    dftmp.to_excel(writer,sheet_name='Fig_2D')

In [None]:
help('modules')


Please wait a moment while I gather a list of all available modules...



nxviz has a new API! Version 0.7.4 onwards, the old class-based API is being
deprecated in favour of a new API focused on advancing a grammar of network
graphics. If your plotting code depends on the old API, please consider
pinning nxviz at version 0.7.4, as the new API will break your old code.

To check out the new API, please head over to the docs at
https://ericmjl.github.io/nxviz/ to learn more. We hope you enjoy using it!

(This deprecation message will go away in version 1.0.)

  """
distributed.dashboard.proxy - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
Using TensorFlow backend.
