# Plot gallery

## How to use this notebook

We use the test dataset stored [here](https://drive.google.com/file/d/1CTwrjO2dPWqISxcEyCJ1oj_EE1IaDrmI/view?usp=share_link). 

You shall store the data under the `data` folder, as follow:

```
/main folder
    /data
        sample1.json
        sample2.json
    gallery.ipynb

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

import os, json, sys
sys.path.append('/Users/ymdt/src/dreem')
import pandas as pd
import dreem 


data = dreem.draw.load_dataset()

study = dreem.draw.Study()
study.df = data
sample, reference, section, family = study.df.iloc[0][['sample', 'reference', 'section', 'family']]

path_figs = '/Users/ymdt/src/dreem/docs/source/plots/plots_figs'
# remove all html files in path_figs
for file in os.listdir(path_figs):
    if file.endswith('.html'):
        os.remove(os.path.join(path_figs, file))
dim = (600, 400)


In [52]:
data = study.get_df(
        sample = '65degrees_1_S20_L001',        # select one or multiple sample(s)
        reference = ['3042-O-flank_1=hp1-DB',   # select one or multiple reference(s)
                        '3043-CC-flank_1=hp1-DB'],
        section = 'ROI',                        # select one or multiple section(s)
        base_type = ['A','C']                   # select one or multiple base type(s)
    )[['sample','reference','section','sequence','sub_rate','deltaG','family','num_aligned','DMS_conc_mM']].reset_index(drop=True)

## Mutation fraction

In [4]:
%reload_ext autoreload
fig = study.mutation_fraction(
    sample = sample,
    reference = reference,
    section='ROI'
)['fig']
fig.show()

In [10]:
%reload_ext autoreload
fig = study.mutation_fraction_identity(
    sample = sample,
    reference = reference,
    section='ROI'
)['fig']
fig.show()

## Mutations in barcode

In [10]:
%reload_ext autoreload

fig = study.mutations_in_barcodes(
    sample = ['10degrees_2_S11_L001','37degrees_01percent_2_S17_L001']
    )['fig']
fig.show()


## DeltaG vs mutation fraction

In [157]:
fig = study.deltaG_vs_sub_rate(sample=sample, section='ROI', family=family, base_type=['A','C'])['fig']
fig.show()
fig.write_html(os.path.join(path_figs, 'deltaG_vs_sub_rate.html'))

## Aligned reads per reference

In [4]:
%reload_ext autoreload

fig = study.num_aligned_reads_per_reference_frequency_distribution(
    sample = sample,
    section = 'full'
)['fig']

fig.show()
fig.write_html(os.path.join(path_figs, 'num_aligned_reads_per_reference_frequency_distribution.html'))

## Mutations per read per sample

In [160]:
fig = study.mutations_per_read_per_sample(
    sample = sample,
)['fig']

fig.show()

fig.write_html(os.path.join(path_figs, 'mutations_per_read_per_sample.html'))

In [2]:
study.experimental_variable_across_samples(
    experimental_variable = 'temperature_k',
    reference = reference,
    section = 'ROI',
    base_type = ['A','C'],
    base_pairing = False
)['fig'].show()
study.experimental_variable_across_samples(
    experimental_variable = 'temperature_k',
    reference = reference,
    section = 'ROI',
)['data']

Unnamed: 0_level_0,A1,A2,G3,A4,T5,A6,T7,T8,C9,G10,...,G14,A15,A16,T17,A18,T19,C20,T21,T22,temperature_k
temperature_k,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
278,0.000672,0.001011,0.010128,0.001355,0.0,0.000337,0.0,0.001008,0.002689,0.005376,...,0.004711,0.001013,0.0,0.0,0.000339,0.001038,0.006777,0.00137,0.0,278.0
278,0.0,0.00151,0.009548,0.0,0.000505,0.001513,0.000504,0.000503,0.007028,0.003008,...,0.000501,0.001011,0.0,0.000504,0.002023,0.0,0.011634,0.0,0.001019,278.0
283,0.002053,0.0,0.012371,0.00207,0.0,0.002049,0.0,0.002066,0.002045,0.006135,...,0.004098,0.002066,0.004124,0.002058,0.0,0.0,0.00207,0.0,0.0,283.0
283,0.000903,0.002714,0.007223,0.000455,0.001362,0.001821,0.0,0.000453,0.003165,0.004968,...,0.004513,0.000908,0.000453,0.0,0.00091,0.001398,0.006372,0.000926,0.001385,283.0
295,0.004709,0.002353,0.006061,0.000338,0.000675,0.001013,0.0,0.000675,0.005039,0.003359,...,0.004035,0.000675,0.0,0.000337,0.0,0.0,0.007802,0.000343,0.000343,295.0
295,0.004374,0.001588,0.007555,0.001998,0.000797,0.0004,0.000399,0.000398,0.004758,0.00436,...,0.005558,0.001604,0.0,0.001193,0.000801,0.000409,0.007194,0.000403,0.002422,295.0
310,0.0015,0.002503,0.00501,0.001513,0.0,0.000502,0.001004,0.001507,0.003002,0.007992,...,0.001001,0.001511,0.0005,0.001001,0.001008,0.0,0.004527,0.000509,0.000509,310.0
310,0.003974,0.000919,0.005805,0.0,0.000918,0.000921,0.000615,0.000611,0.002135,0.006707,...,0.003959,0.001534,0.001531,0.000917,0.000308,0.000314,0.00461,0.000933,0.000311,310.0
338,0.013093,0.00613,0.005304,0.002465,0.000819,0.0,0.00041,0.000409,0.002437,0.003663,...,0.003259,0.002464,0.003676,0.000815,0.002048,0.001251,0.004932,0.000416,0.001248,338.0
338,0.008681,0.004219,0.005625,0.003305,0.000941,0.001176,0.000706,0.000469,0.002807,0.002809,...,0.00351,0.004002,0.002811,0.000704,0.00141,0.000482,0.005194,0.000479,0.000959,338.0


In [18]:
study.df[['DMS_conc_mM','temperature_k','buffer','cell_line','exp_env','inc_time_tot_secs','sample']].drop_duplicates()

Unnamed: 0,DMS_conc_mM,temperature_k,buffer,cell_line,exp_env,inc_time_tot_secs,sample
0,105.0,338,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,65degrees_1_S20_L001
120,105.0,278,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,5degrees_2_S9_L001
239,105.0,295,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,25degrees_2_S13_L001
359,105.0,310,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,37degrees_1percent_2_S15_L001
478,105.0,310,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,37degrees_1percent_1_S14_L001
598,105.0,295,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,25degrees_1_S12_L001
717,105.0,338,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,65degrees_2_S21_L001
835,105.0,278,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,5degrees_1_S8_L001
953,105.0,283,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,10degrees_2_S11_L001
1072,105.0,318,"0.36 M sodium cacodylate, pH 7.2, containing 5...",,in_vitro,300,45degrees_2_S19_L001


In [2]:
study.df.columns

Index(['DMS_conc_mM', 'buffer', 'cell_line', 'date', 'exp_env',
       'inc_time_tot_secs', 'library', 'sample', 'temperature_k', 'user',
       'reference', 'barcode_end', 'barcode_start', 'family', 'flank',
       'num_aligned', 'secondary_signature', 'secondary_signature_end',
       'secondary_signature_start', 'section_boundaries', 'sequence',
       'section', 'cluster', 'deltaG', 'section_end', 'section_start',
       'structure', 'min_cov', 'cov', 'del', 'info',
       'ins', 'sub_A', 'sub_C', 'sub_G', 'sub_N',
       'sub_T', 'sub_N', 'sub_rate', 'sub_hist',
       'poisson_high', 'poisson_low', 'barcode'],
      dtype='object')