## Imports

In [None]:
!pip install -U plotly kaleido

In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

from plotly.subplots import make_subplots

## Data

In [3]:
"""Initialize generated data for pie charts."""

data_list = [        
('Z-DNA',
"""\
9    Promoter (<=1kb) 40.95135480
10   Promoter (1-2kb)  5.89808053
11   Promoter (2-3kb)  3.73518496
4              5' UTR  0.02394349
3              3' UTR  0.81806936
1            1st Exon  0.01596233
7          Other Exon  3.09669181
2          1st Intron  4.93235963
8        Other Intron 12.46258829
6  Downstream (<=300)  0.05586815
5   Distal Intergenic 28.00989664
"""),
('SIDD',
"""\
9    Promoter (<=1kb)  6.74032427
10   Promoter (1-2kb)  4.19717573
11   Promoter (2-3kb)  3.37996862
4              5' UTR  0.01307531
3              3' UTR  0.92180962
1            1st Exon  0.04576360
7          Other Exon  0.47724895
2          1st Intron  7.38755230
8        Other Intron 18.59309623
6  Downstream (<=300)  0.07191423
5   Distal Intergenic 58.17207113
"""),
('G4',
"""\
9    Promoter (<=1kb) 36.5871723
10   Promoter (1-2kb)  5.5448576
11   Promoter (2-3kb)  3.4760282
4              5' UTR  0.0493754
3              3' UTR  1.1603219
1            1st Exon  0.0246877
7          Other Exon  1.5256999
2          1st Intron  5.4510443
8        Other Intron 13.5930479
6  Downstream (<=300)  0.0740631
5   Distal Intergenic 32.5137017
"""),
('H-DNA',
"""\
9    Promoter (<=1kb) 11.690058480
10   Promoter (1-2kb)  4.520467836
11   Promoter (2-3kb)  3.690058480
4              5' UTR  0.005847953
3              3' UTR  0.543859649
1            1st Exon  0.023391813
7          Other Exon  0.345029240
2          1st Intron  8.000000000
8        Other Intron 20.087719298
6  Downstream (<=300)  0.040935673
5   Distal Intergenic 51.052631579
"""),
('Z-DNA & miRNA',
"""\
7  Promoter (<=1kb) 26.01002654
8  Promoter (1-2kb)  5.69153642
9  Promoter (2-3kb)  4.30551460
3            5' UTR  0.05897965
2            3' UTR  0.70775582
5        Other Exon  1.12061339
1        1st Intron  7.01857859
6      Other Intron 18.43114126
4 Distal Intergenic 36.65585373
"""),
('SIDD & miRNA',
"""\
9    Promoter (<=1kb)  6.349337067
10   Promoter (1-2kb)  4.315243350
11   Promoter (2-3kb)  3.434077246
4              5' UTR  0.008235197
3              3' UTR  0.897636498
1            1st Exon  0.041175986
7          Other Exon  0.502347031
2          1st Intron  7.502264679
8        Other Intron 18.809190480
6  Downstream (<=300)  0.074116775
5   Distal Intergenic 58.066375690
"""),
('G4 & miRNA',
"""\
8    Promoter (<=1kb) 38.82687678
9    Promoter (1-2kb)  5.66863646
10   Promoter (2-3kb)  3.83015977
3              5' UTR  0.04377325
2              3' UTR  1.24753775
6          Other Exon  2.01356971
1          1st Intron  4.88071788
7        Other Intron 12.54103743
5  Downstream (<=300)  0.02188663
4   Distal Intergenic 30.92580433
"""),
('H-DNA & miRNA',
"""\
7   Promoter (<=1kb) 10.71913161
8   Promoter (1-2kb)  4.00271370
9   Promoter (2-3kb)  3.86702849
2             3' UTR  0.81411126
5         Other Exon  0.13568521
1         1st Intron  7.39484396
6       Other Intron 21.70963365
4 Downstream (<=300)  0.06784261
3  Distal Intergenic 51.28900950
"""),
('Z-DNA & SIDD(+-100p)',
"""\
8    Promoter (<=1kb) 20.0527704
9    Promoter (1-2kb)  6.0686016
10   Promoter (2-3kb)  4.0897098
3              3' UTR  0.9234828
1            1st Exon  0.1319261
6          Other Exon  0.9234828
2          1st Intron  6.5963061
7        Other Intron 19.3931398
5  Downstream (<=300)  0.1319261
4   Distal Intergenic 41.6886544
"""),
('SIDD(+-100bp)',
"""\
9    Promoter (<=1kb)  7.20449791
10   Promoter (1-2kb)  4.11218619
11   Promoter (2-3kb)  3.47803347
4              5' UTR  0.02615063
3              3' UTR  0.96103556
1            1st Exon  0.05230126
7          Other Exon  0.84989540
2          1st Intron  7.29602510
8        Other Intron 18.05700837
6  Downstream (<=300)  0.05883891
5   Distal Intergenic 57.90402720
"""),
('G4 & SIDD(+-100bp)',
"""\
6  Promoter (<=1kb)  6.7555556
7  Promoter (1-2kb)  4.4888889
8  Promoter (2-3kb)  3.3333333
2            3' UTR  0.7555556
4        Other Exon  0.1777778
1        1st Intron  8.4444444
5      Other Intron 19.8222222
3 Distal Intergenic 56.2222222
"""),
('H-DNA & SIDD(+-100bp)',
"""\
7   Promoter (<=1kb) 10.16865079
8   Promoter (1-2kb)  4.76190476
9   Promoter (2-3kb)  2.77777778
2             3' UTR  0.99206349
5         Other Exon  0.24801587
1         1st Intron  7.44047619
6       Other Intron 20.08928571
4 Downstream (<=300)  0.09920635
3  Distal Intergenic 53.42261905
"""),
('Z-DNA & SIDD(+-200bp)',
"""\
9    Promoter (<=1kb) 23.63473590
10   Promoter (1-2kb)  6.26678603
11   Promoter (2-3kb)  3.67054611
4              5' UTR  0.08952551
3              3' UTR  0.98478066
1            1st Exon  0.08952551
7          Other Exon  1.07430618
2          1st Intron  6.17726052
8        Other Intron 18.53178156
6  Downstream (<=300)  0.08952551
5   Distal Intergenic 39.39122650
"""),
('SIDD(+-200bp)',
"""\
9    Promoter (<=1kb)  7.64252092
10   Promoter (1-2kb)  3.98797071
11   Promoter (2-3kb)  3.53687238
4              5' UTR  0.03268828
3              3' UTR  1.01987448
1            1st Exon  0.05230126
7          Other Exon  1.26830544
2          1st Intron  7.21757322
8        Other Intron 17.54053347
6  Downstream (<=300)  0.06537657
5   Distal Intergenic 57.63598326
"""),
('G4 & SIDD(+-200bp)',
"""\
7   Promoter (<=1kb)  9.5106551
8   Promoter (1-2kb)  4.8539858
9   Promoter (2-3kb)  3.5911602
2             3' UTR  1.0260458
5         Other Exon  0.1973165
1         1st Intron  7.8926598
6       Other Intron 19.1002368
4 Downstream (<=300)  0.0394633
3  Distal Intergenic 53.7884767
"""),
('H-DNA & SIDD(+-200bp)',
"""\
7   Promoter (<=1kb) 10.04891063
8   Promoter (1-2kb)  4.89106269
9   Promoter (2-3kb)  2.97910182
2             3' UTR  0.97821254
5         Other Exon  0.22232103
1         1st Intron  7.87016452
6       Other Intron 20.36460649
4 Downstream (<=300)  0.08892841
3  Distal Intergenic 52.55669186
"""),
('Z-DNA & SIDD(+-500bp)',
"""\
9    Promoter (<=1kb) 33.0743618
10   Promoter (1-2kb)  6.6037736
11   Promoter (2-3kb)  3.8845727
4              5' UTR  0.0554939
3              3' UTR  1.4983352
1            1st Exon  0.1109878
7          Other Exon  1.8867925
2          1st Intron  4.8834628
8        Other Intron 15.5937847
6  Downstream (<=300)  0.0554939
5   Distal Intergenic 32.3529412
"""),
('SIDD(+-500bp)',
"""\
9    Promoter (<=1kb)  8.96312762
10   Promoter (1-2kb)  3.64147490
11   Promoter (2-3kb)  3.56302301
4              5' UTR  0.07845188
3              3' UTR  1.23561715
1            1st Exon  0.03922594
7          Other Exon  1.96129707
2          1st Intron  7.00183054
8        Other Intron 16.46835774
6  Downstream (<=300)  0.06537657
5   Distal Intergenic 56.98221757
"""),
('G4 & SIDD(+-500bp)',
"""\
7   Promoter (<=1kb) 15.31239883
8   Promoter (1-2kb)  5.05017805
9   Promoter (2-3kb)  3.59339592
2             3' UTR  1.23017158
5         Other Exon  0.48559404
1         1st Intron  7.08967303
6       Other Intron 18.06409841
4 Downstream (<=300)  0.09711881
3  Distal Intergenic 49.07737132
"""),
('H-DNA & SIDD(+-500bp)',
"""\
7   Promoter (<=1kb) 11.1718750
8   Promoter (1-2kb)  4.6875000
9   Promoter (2-3kb)  3.0468750
2             3' UTR  0.9765625
5         Other Exon  0.2343750
1         1st Intron  7.8515625
6       Other Intron 20.1562500
4 Downstream (<=300)  0.0781250
3  Distal Intergenic 51.7968750
"""),
]

data_list = np.array(data_list)

print([f"{i}: {x[0]}" for i, x in enumerate(data_list)])
len(data_list)

['0: Z-DNA', '1: SIDD', '2: G4', '3: H-DNA', '4: Z-DNA & miRNA', '5: SIDD & miRNA', '6: G4 & miRNA', '7: H-DNA & miRNA', '8: Z-DNA & SIDD(+-100p)', '9: SIDD(+-100bp)', '10: G4 & SIDD(+-100bp)', '11: H-DNA & SIDD(+-100bp)', '12: Z-DNA & SIDD(+-200bp)', '13: SIDD(+-200bp)', '14: G4 & SIDD(+-200bp)', '15: H-DNA & SIDD(+-200bp)', '16: Z-DNA & SIDD(+-500bp)', '17: SIDD(+-500bp)', '18: G4 & SIDD(+-500bp)', '19: H-DNA & SIDD(+-500bp)']


20

## Plots

In [16]:
"""Define necessary functions for plotting."""

def get_category_value(category: str, raw_text_data: str):
  """Parse raw text input from chipSeeker and return value."""

  if category not in raw_text_data:
    return 0
  return float(raw_text_data.split(category)[1].split('\n')[0].strip())

def draw_pie_subplots(data:list, rows:int, cols:int, title:str='result'):
  """Draw a pie chart."""

  fig = make_subplots(rows=rows, cols=cols, specs=np.full((rows,cols), {'type':'domain'}).tolist(), subplot_titles=[x[0] for x in data])

  for i, (data_name, data_raw) in enumerate(data):
    categories = {
        "Promoter (<=1kb)":None,
        "Promoter (1-2kb)":None,
        "Promoter (2-3kb)":None,
        "5\' UTR":None,
        "3\' UTR":None,
        "1st Exon":None,
        "Other Exon":None,
        "1st Intron":None,
        "Other Intron":None,
        "Downstream (<=300)":None,
        "Distal Intergenic":None
    }

    for cat in categories.keys():
      categories[cat] = get_category_value(cat, data_raw)

    fig.add_trace(go.Pie(sort=False, rotation=0,
                        labels=list(categories.keys()),
                        values=list(categories.values()),
                        name=data_name),
                  1, i+1)

  fig.update_traces(hole=.4, hoverinfo="label+percent+name", texttemplate='%{value:.2f}%')

  fig.update_annotations(yshift=20)

  fig.update_layout(
      uniformtext_minsize=10, uniformtext_mode='hide',
      legend = dict(font=dict(size=16)),
      height=500,
      width=1500,
  )
  fig.write_image(title+'.png')
  fig.show()

In [17]:
"""Draw the figures."""

!rm -r img ; mkdir -p img
for i in range(0, len(data_list), 4):
  draw_pie_subplots(data_list[i:i+4], 1, 4, f"img/mm10_kouzine_ssDNA_pie_chart_{i}")