In [50]:
from collections import defaultdict
from collections import OrderedDict

import pysradb
from pysradb import SRAdb
import os
import glob
import pandas as pd
from riboraptor.helpers import path_leaf, parse_star_logs, millify, order_dataframe
from riboraptor.cutadapt_to_json import cutadapt_to_json
from riboraptor.utils import summary_starlogs_over_runs, mkdir_p

root_dir = '/data1/re-ribo-analysis/'

builds = os.listdir(root_dir)

In [2]:
builds

['Mmul8',
 'GRCg6',
 'panTro3',
 'hg38',
 'Rnor6.0',
 'BDGP6',
 'GRCz11',
 'mm10',
 'WBcel235']

In [100]:
def check_ribotricer_output_exists(srp, srx, assembly):
    path = "/data1/re-ribo-analysis"
    path = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_translating_ORFs.tsv".format(srx))
    if os.path.exists(path):
        return path
    
def summarise_ribotricer_output_exists(path):
    df = pd.read_csv(path, sep='\t', use_cols = ['ORF_ID'])
    df_grouped 
    return df
    
def check_ribotricer_metagene_exists(srp, srx, assembly):
    path = "/data1/re-ribo-analysis"
    path_5p = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_metagene_profiles_5p.tsv".format(srx))
    path_3p = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_metagene_profiles_3p.tsv".format(srx))
    path_5p_tsv = None
    path_3p_tsv = None
    if os.path.exists(path_5p):
        path_5p_tsv = path_5p
    if os.path.exists(path_3p):
        path_3p_tsv = path_3p
        
    return path_5p_tsv, path_3p_tsv



def check_ribotricer_metagene_plot_exists(srp, srx, assembly):
    path = "/data1/re-ribo-analysis"
    path = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_metagene_plots.pdf".format(srx))
    if os.path.exists(path):
        return path
    
def check_ribotricer_protocol_exists(srp, srx, assembly):
    path = "/data1/re-ribo-analysis"
    path = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_protocol.txt".format(srx))
    if os.path.exists(path):
        return path

def check_ribotricer_bam_summary_exists(srp, srx, assembly):
    path = "/data1/re-ribo-analysis"
    path = os.path.join(path, assembly, srp, "ribotricer_results" ,"{}_bam_summary.txt".format(srx))
    if os.path.exists(path):
        return path



In [108]:
def get_srp_table(srp, assembly, re_ribo_analysis_dir):
    sradb = SRAdb("/data2/SRAmetadb.sqlite")
    column_order = [
        "study_accession",
        "experiment_title",
        "experiment_accession",
        "run_accession",
        "taxon_id",
        "library_selection",
        "library_layout",
        "library_strategy",
        "library_source",
        "library_name",
        "adapter_spec",
        "bases",
        "spots",
        "avg_read_length",
        "pass1_adapter",
        "pass1_total_reads_processed",
        "pass1_reads_with_adapters",
        "pass2_adapter",
        "pass2_total_reads_processed",
        "pass2_reads_with_adapters",
        "mapping_total_reads_input",
        "uniquely_mapped",
        "uniquely_mapped_percent",
        "ribotricer_orfs"
    ]
    filepath = os.path.join(re_ribo_analysis_dir, assembly, srp)
    if os.path.exists(filepath):

        try:
            srp_df = sradb.sra_metadata(srp.split("_")[0], detailed=True)#, expand_sample_attributes=True)
        except:
            return pd.DataFrame()
        srp_df.library_layout = srp_df.library_layout.fillna("SINGLE")
        srp_df = srp_df[srp_df.library_layout.str.contains("SINGLE")]

        srp_df["pass1_reads_with_adapters"] = None
        srp_df["pass1_total_reads_processed"] = None
        srp_df["pass1_adapter"] = None
        srp_df["pass2_adapter"] = None
        srp_df["pass2_total_reads_processed"] = None
        srp_df["pass2_reads_with_adapters"] = None
        srp_df["mapping_total_reads_input"] = None
        srp_df["uniquely_mapped"] = None
        srp_df["uniquely_mapped_percent"] = None
        srp_df["ribotricer_orfs"] = None
        srp_df["ribotricer_metagene_5p"] = None
        srp_df["ribotricer_metagene_3p"] = None
        
        srp_df["ribotricer_metagene_plot"] =  None
        srp_df["ribotricer_protocol"] = None
        srp_df["ribotricer_bam_summary"] = None
        
        

        srpdir = os.path.join(re_ribo_analysis_dir, assembly, srp)
        starlogsdir = os.path.join(srpdir, "starlogs")
        srp_srx_grouped = srp_df.groupby("experiment_accession")
        preprocess_step1_dir = os.path.join(srpdir, "preprocessed_step1")
        preprocess_step2_dir = os.path.join(srpdir, "preprocessed")
        for srx, srx_group in srp_srx_grouped:
            ribotricer_output = check_ribotricer_output_exists(srp, srx, assembly)
            ribotricer_metagene_5p, ribotricer_metagene_3p = check_ribotricer_metagene_exists(srp, srx, assembly)
            
            ribotricer_bam_summary = check_ribotricer_bam_summary_exists(srp, srx, assembly)
            ribotricer_protocol = check_ribotricer_protocol_exists(srp, srx, assembly)            
            ribotricer_metagene_plot = check_ribotricer_metagene_plot_exists(srp, srx, assembly)
            
            srrs = srx_group["run_accession"].tolist()
            if ribotricer_output:
                srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_orfs"] = ribotricer_output
                
            srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_metagene_5p"] = ribotricer_metagene_5p
            srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_metagene_3p"] = ribotricer_metagene_3p

            srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_bam_summary"] = ribotricer_bam_summary
            srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_protocol"] = ribotricer_protocol
            srp_df.loc[srp_df.experiment_accession == srx, "ribotricer_metagene_plot"] = ribotricer_metagene_plot

                
            # starlogs_df = summary_starlogs_over_runs(starlogsdir, srrs)

            for srr in srrs:
                starlogs_df = None
                if os.path.isfile(os.path.join(starlogsdir, srr + "Log.final.out")):
                    starlogs_df = parse_star_logs(
                        os.path.join(starlogsdir, srr + "Log.final.out")
                    )
                # Preprocessed_step1 adapter info
                step1_txt = os.path.join(
                    preprocess_step1_dir, srr + ".fastq.gz_trimming_report.txt"
                )
                step2_txt = os.path.join(
                    preprocess_step2_dir, srr + "_trimmed.fq.gz_trimming_report.txt"
                )
                step1_cutadapt_json = None
                step2_cutadapt_json = None

                if os.path.isfile(step1_txt):
                    step1_cutadapt_json = cutadapt_to_json(step1_txt)

                if os.path.isfile(step2_txt):
                    step2_cutadapt_json = cutadapt_to_json(step2_txt)

                if step1_cutadapt_json:
                    adapters = step1_cutadapt_json["adapters"]
                    if len(step1_cutadapt_json["adapters"]) == 0:
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass1_adapter"
                        ] = "Empty?"
                    elif isinstance(adapters, str):
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass1_adapter"
                        ] = step1_cutadapt_json["adapters"]
                    else:
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass1_adapter"
                        ] = step1_cutadapt_json["adapters"][
                            "{} - {}".format(srr, "Adapter 1")
                        ]
                        trim_info1 = step1_cutadapt_json["trim_info"][srr]
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass1_total_reads_processed"
                        ] = trim_info1["r_processed"]
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass1_reads_with_adapters"
                        ] = trim_info1["r_with_adapters"]
                if step2_cutadapt_json:
                    adapters = step2_cutadapt_json["adapters"]
                    if len(step2_cutadapt_json["adapters"]) == 0:
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass2_adapter"
                        ] = "Empty?"
                    elif isinstance(adapters, str):
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass2_adapter"
                        ] = step2_cutadapt_json["adapters"]
                    else:
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass2_adapter"
                        ] = step2_cutadapt_json["adapters"][
                            "{} - {}".format(srr + "_trimmed", "Adapter 1")
                        ]
                        trim_info2 = step2_cutadapt_json["trim_info"][srr + "_trimmed"]
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass2_reads_with_adapters"
                        ] = trim_info2["r_with_adapters"]
                        srp_df.loc[
                            srp_df.run_accession == srr, "pass2_total_reads_processed"
                        ] = trim_info2["r_processed"]

                if starlogs_df:
                    srp_df.loc[
                        srp_df.run_accession == srr, "mapping_total_reads_input"
                    ] = starlogs_df["total_reads"]
                    srp_df.loc[
                        srp_df.run_accession == srr, "uniquely_mapped"
                    ] = starlogs_df["uniquely_mapped"]
                    srp_df.loc[
                        srp_df.run_accession == srr, "uniquely_mapped_percent"
                    ] = starlogs_df["uniquely_mapped_percent"]

        cols = [
            "bases",
            "spots",
            "pass1_reads_with_adapters",
            "pass2_reads_with_adapters",
            "pass2_total_reads_processed",
            "pass1_total_reads_processed",
            "uniquely_mapped",
            "mapping_total_reads_input",
        ]
        for col in cols:
            srp_df[col] = srp_df[col].apply(lambda z: millify(z))
        sradb.close()
        return order_dataframe(srp_df, column_order)

In [109]:
ROOT_DIRS = ["/data1/re-ribo-analysis"]# "/data2/re-ribo-analysis", "/data3/re-ribo-analysis", "/data4/re-ribo-analysis"]
READ_LENGTH_DIRNAME = "read_lengths"
METAGENE_COVERAGE_DIRNAME = "metagene_coverages"
METAGENE_LENWISE_COVERAGE_DIRNAME = "metagene_coverage_lengthwise"

# Top level directory of the directories inside each of the ROOT_DIRS
__ASSEMBLIES__ = [os.listdir(dirname) for dirname in ROOT_DIRS]
__SPECIES__ = [
    {"label": "H.sapiens", "value": "hg38"},
    {"label": "M.musculus", "value": "mm10"},
    {"label": "C.albicans", "value": "SC5314"}
]
__ASSEMBLIES__ = list(
    sorted(set([item for sublist in __ASSEMBLIES__ for item in sublist]))
)
__ASSEMBLY_WISE_SRP__ = defaultdict(list)
__SRP_TO_ROOT_DIR_MAP__ = defaultdict(dict)

#DATASETS = {"hg38": pd.read_csv("/data1/hg_datasets.tsv", sep="\t"),
#            "mm10": pd.read_csv("/data1/mm_datasets.tsv", sep="\t")}

for root_dir in ROOT_DIRS:
    for assembly_build in os.listdir(root_dir):
        for srp_dir in filter(
            os.path.isdir, glob.glob(os.path.join(root_dir, assembly_build, "*"))
        ):
            srp = os.path.basename(srp_dir)
            __ASSEMBLY_WISE_SRP__[assembly_build].append(srp)
            __SRP_TO_ROOT_DIR_MAP__[srp][assembly_build] = os.path.join(
                root_dir, assembly_build, srp
            )

def generate_tablex(dataframe, max_rows=26):
    return html.Table(
        # Header
        [html.Tr([html.Th(col) for col in dataframe.columns]) ] +
        # Body
        [html.Tr([
            html.Td(dataframe.iloc[i][col]) for col in dataframe.columns
        ]) for i in range(min(len(dataframe), max_rows))]
    )


In [110]:
__ASSEMBLY_WISE_SRP__ = defaultdict(list)
__SRP_TO_ROOT_DIR_MAP__ = defaultdict(dict)
for root_dir in ROOT_DIRS:
    for assembly_build in os.listdir(root_dir):
        for srp_dir in filter(
            os.path.isdir, glob.glob(os.path.join(root_dir, assembly_build, "*"))
        ):
            srp = os.path.basename(srp_dir)
            __ASSEMBLY_WISE_SRP__[assembly_build].append(srp)
            __SRP_TO_ROOT_DIR_MAP__[srp][assembly_build] = os.path.join(
                root_dir, assembly_build, srp
            )

In [111]:
__ASSEMBLY_WISE_SRP__

defaultdict(list,
            {'Mmul8': ['SRP028612', 'SRP062129'],
             'GRCg6': ['SRP096694'],
             'panTro3': ['SRP028612', 'SRP062129'],
             'hg38': ['SRP065528',
              'ERP021735',
              'SRP102021',
              'SRP065529',
              'SRP115659',
              'SRP044932',
              'SRP102616',
              'SRP103009',
              'SRP090415',
              'SRP044933',
              'SRP044935',
              'SRP075585',
              'SRP044936',
              'SRP058501',
              'SRP028612',
              'SRP102020',
              'SRP062129',
              'SRP113333',
              'SRP065530',
              'SRP083699',
              'SRP114321',
              'SRP044934',
              'SRP067300',
              'SRP044937',
              'SRP059546',
              'SRP101952',
              'SRP098789',
              'SRP059547',
              'SRP062129_rm_quicksect',
              'SRP059548',
            

In [112]:
def get_fragment_lengths(file_path):
    return pd.read_csv(file_path, sep='\t').fragment_length.tolist()


In [117]:
db = SRAdb('/data2/SRAmetadb.sqlite')
all_projects = []
re_ribo_analysis_dir = '/data1/re-ribo-analysis'
for species, sample_list in __ASSEMBLY_WISE_SRP__.items():
    mkdir_p('/data2/re-ribo-analysis-metadata/{}'.format(species))
    for srp in sample_list:
        df = get_srp_table(srp, species, re_ribo_analysis_dir)
        project_filepath = '/data1/re-ribo-analysis/{}/{}'.format(species, srp)    
        metadata_filepath = '/data2/re-ribo-analysis-metadata/{}/{}.tsv'.format(species, srp)
        df_subset = df[df.ribotricer_metagene_5p == df.ribotricer_metagene_5p].ribotricer_metagene_5p.tolist()
        fragment_lengths = []
        for f in df_subset:
            fragment_lengths += get_fragment_lengths(f)
        fragment_lengths = list(sorted(list(set(fragment_lengths))))
        all_projects.append((species, srp, project_filepath, metadata_filepath, str(fragment_lengths)))        
        df.to_csv(metadata_filepath, sep='\t', index=False, header=True)

In [118]:
summary_df = pd.DataFrame(all_projects)
summary_df.columns = ['species', 'srp', 'project_output_path', 'project_metadata_path', 'fragment_lengths']
summary_df = summary_df.sort_values(by=['species', 'srp'])
summary_df.to_csv('/data2/datasets.tsv', sep='\t', index=False, header=True)
summary_df

Unnamed: 0,species,srp,project_output_path,project_metadata_path,fragment_lengths
40,BDGP6,ERP008887,/data1/re-ribo-analysis/BDGP6/ERP008887,/data2/re-ribo-analysis-metadata/BDGP6/ERP0088...,[]
42,BDGP6,SRP028243,/data1/re-ribo-analysis/BDGP6/SRP028243,/data2/re-ribo-analysis-metadata/BDGP6/SRP0282...,"[27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 3..."
43,BDGP6,SRP033366,/data1/re-ribo-analysis/BDGP6/SRP033366,/data2/re-ribo-analysis-metadata/BDGP6/SRP0333...,"[34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 4..."
44,BDGP6,SRP072369,/data1/re-ribo-analysis/BDGP6/SRP072369,/data2/re-ribo-analysis-metadata/BDGP6/SRP0723...,"[34, 35, 36, 37, 38]"
41,BDGP6,SRP108999,/data1/re-ribo-analysis/BDGP6/SRP108999,/data2/re-ribo-analysis-metadata/BDGP6/SRP1089...,"[23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 3..."
2,GRCg6,SRP096694,/data1/re-ribo-analysis/GRCg6/SRP096694,/data2/re-ribo-analysis-metadata/GRCg6/SRP0966...,"[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 2..."
49,GRCz11,SRP010040,/data1/re-ribo-analysis/GRCz11/SRP010040,/data2/re-ribo-analysis-metadata/GRCz11/SRP010...,"[19, 20, 21, 23, 24]"
46,GRCz11,SRP021915,/data1/re-ribo-analysis/GRCz11/SRP021915,/data2/re-ribo-analysis-metadata/GRCz11/SRP021...,"[23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 3..."
47,GRCz11,SRP023492,/data1/re-ribo-analysis/GRCz11/SRP023492,/data2/re-ribo-analysis-metadata/GRCz11/SRP023...,"[23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 3..."
48,GRCz11,SRP033369,/data1/re-ribo-analysis/GRCz11/SRP033369,/data2/re-ribo-analysis-metadata/GRCz11/SRP033...,"[19, 20, 24, 25, 29, 33, 34, 35, 36, 39]"


In [137]:
%%timeit 
ribotricer_df = pd.read_csv("/data1/re-ribo-analysis/hg38/SRP044936/ribotricer_results/SRX663288_translating_ORFs.tsv", sep='\t')


  call = lambda f, *a, **k: f(*a, **k)
  all_runs = timer.repeat(repeat, number)


27.1 s ± 157 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [140]:
ribotricer_df = pd.read_csv("/data1/re-ribo-analysis/hg38/SRP044936/ribotricer_results/SRX663288_translating_ORFs.tsv", sep='\t')

ribotricer_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ORF_ID,ORF_type,status,phase_score,read_count,length,valid_codons,transcript_id,transcript_type,gene_id,gene_name,gene_type,chrom,strand,start_codon,profile
0,ENST00000641515_65565_70005_978,annotated,nontranslating,0.0,0,978,0,ENST00000641515,protein_coding,ENSG00000186092,OR4F5,protein_coding,1,+,ATG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,ENST00000335137_69091_70005_915,annotated,nontranslating,0.0,0,915,0,ENST00000335137,protein_coding,ENSG00000186092,OR4F5,protein_coding,1,+,ATG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,ENST00000426406_450743_451678_936,annotated,nontranslating,0.0,0,936,0,ENST00000426406,protein_coding,ENSG00000284733,OR4F29,protein_coding,1,-,ATG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,ENST00000332831_685719_686654_936,annotated,nontranslating,0.0,0,936,0,ENST00000332831,protein_coding,ENSG00000284662,OR4F16,protein_coding,1,-,ATG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,ENST00000420190_924432_939291_1074,annotated,nontranslating,0.330719,9,1074,8,ENST00000420190,protein_coding,ENSG00000187634,SAMD11,protein_coding,1,+,ATG,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [143]:
%timeit ribotricer_df = pd.read_csv("/data1/re-ribo-analysis/hg38/SRP044936/ribotricer_results/SRX663288_translating_ORFs.tsv", sep='\t', usecols=['ORF_type', 'status', 'phase_score'])


15 s ± 23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [144]:
ribotricer_df.ORF_type.unique()

array(['annotated', 'novel', 'super_dORF', 'overlap_uORF', 'overlap_dORF',
       'super_uORF', 'dORF', 'uORF'], dtype=object)

In [None]:
ribotricer_df = pd.read_csv("/data1/re-ribo-analysis/hg38/SRP044936/ribotricer_results/SRX663288_translating_ORFs.tsv", sep='\t', usecols=[ 'ORF_type', 'status', 'phase_score'])
ribotricer_df_grouped = ribotricer_df.groupby(['ORF_'])


In [94]:
def read_ribotricer_bam_summary(file_path):
    summary_dict = OrderedDict()
    fragment_len_dist_dict = OrderedDict()
    reading_length_dist = False
    reading_summary = False
    with open(file_path) as fh:
        for index, line in enumerate(fh):
            line = line.strip()
            if line == '':
                continue
            if index == 0:
                assert line == 'summary:'
                reading_summary = True
                continue
            if line == 'length dist:':
                reading_summary = False
                reading_length_dist = True
                continue

            if reading_summary:
                try:
                    key, value = line.split(':')
                except:
                    raise Exception('Unable to parse {}'.format(line))
                value = value.strip(' ')
                summary_dict[key] = int(value)
            if reading_length_dist:
                try:
                    key, value = line.split(':')
                except:
                    raise Exception('Unable to parse {}'.format(line))
                value = value.strip(' ')
                fragment_len_dist_dict[int(key)] = int(value)
    return summary_dict, pd.Series(fragment_len_dist_dict).sort_index()
                
            
            

In [95]:
x,y = read_ribotricer_bam_summary('/data1/re-ribo-analysis/hg38/SRP090415/ribotricer_results/SRX2189155_bam_summary.txt')

In [96]:
x

OrderedDict([('total_reads', 45704297),
             ('unique_mapped', 45704297),
             ('qcfail', 0),
             ('duplicate', 0),
             ('secondary', 0),
             ('unmapped', 0),
             ('multi', 0)])

In [107]:
pd.DataFrame(y, columns=['phase_Sco'])

Unnamed: 0,0
18,104
19,83
20,121
21,136
22,162
23,158
24,202
25,138
26,233
27,185


# Summarisze count files

In [None]:
def summary_read_count_file(file_path):
    """Read a counts file outputted by ribotircer and get the sum of the counts
    
    Parameters
    ----------
    file_path: string
    
    Returns
    -------
    dist_normalized_counts: array
                            Array of counts normalized by length
    sum_counts: int
                Counts
    """
    df = pd.read_csv(file_path, sep='\t')
    normalized = df.count/df.length
    return df.count.sum(), normalized

def 


In [119]:
def rgb(minimum, maximum, value):
    minimum, maximum = float(minimum), float(maximum)
    ratio = 2 * (value-minimum) / (maximum - minimum)
    b = int(max(0, 255*(1 - ratio)))
    r = int(max(0, 255*(ratio - 1)))
    g = 255 - b - r
    return r, g, b

In [122]:
rgb(0.42, 1, 0.42)
import seaborn as sns

In [131]:
sns.light_palette((260, 75, 60), input="husl")
def hex_to_rgb(h):
    h = h.lstrip('#')
    return 'rgb({}, {}, {})'.format(*tuple(int(h[i:i+2], 16) for i in (0, 2, 4)))

In [132]:
hex_to_rgb('#eaedfb')

'rgb(234, 237, 251)'

In [136]:
from matplotlib.colors import LinearSegmentedColormap

boundaries = [0.0, 0.42, 0.5, 0.6, 0.7, 0.7, 0.9, 1.0]  # custom boundaries

# here I generated twice as many colors, 
# so that I could prune the boundaries more clearly
hex_colors = sns.light_palette((260, 75, 60), input="husl", n_colors=len(boundaries) * 2 + 2, as_cmap=False).as_hex()
hex_colors = [hex_to_rgb(hex_colors[i]) for i in range(0, len(hex_colors), 2)]

#rgb_colors = sns.light_palette((260, 75, 60), input="husl", n_colors=len(boundaries) * 2 + 2, as_cmap=False).as_rgb()
#rgb_colors = [hex_to_rgb(rgb_colors[i]) for i in range(0, len(rgb_colors), 2)]

colors=list(zip(boundaries, hex_colors))
colors


[(0.0, 'rgb(234, 237, 251)'),
 (0.42, 'rgb(220, 226, 248)'),
 (0.5, 'rgb(207, 214, 245)'),
 (0.6, 'rgb(193, 203, 243)'),
 (0.7, 'rgb(180, 191, 240)'),
 (0.7, 'rgb(166, 180, 237)'),
 (0.9, 'rgb(153, 168, 234)'),
 (1.0, 'rgb(139, 157, 232)')]

In [125]:
custom_color_map

<matplotlib.colors.LinearSegmentedColormap at 0x7fc4d1064c88>

In [134]:
colors

[(0.0, '#eaedfb'),
 (0.42, '#dce2f8'),
 (0.5, '#cfd6f5'),
 (0.6, '#c1cbf3'),
 (0.7, '#b4bff0'),
 (0.7, '#a6b4ed'),
 (0.9, '#99a8ea'),
 (1.0, '#8b9de8')]