In [2]:
import numpy as np
import scipy.stats
from scipy.stats import linregress
import pandas as pd
import bokeh.io
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, gridplot, show
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d 
from bokeh.palettes import all_palettes
output_notebook()

def make_frequency_lists(filename):
    # clean input text file
    mscs_open = open(filename, 'r')
    mscs_data = mscs_open.read()
    mscs_split = mscs_data.split('!')
    # print(mscs_split) 

    list_of_lists = []
    for x in range(0, len(mscs_split)):
        new_list = []
        new_list_clean = []
        clean_sequence = []
        if len(mscs_split[x]) > 200:
            new_list = mscs_split[x].split('\t')
            new_list_clean.append(new_list[0])
            new_list_clean.append(new_list[1])
            clean_sequence = new_list[3].split('\n')
            new_list_clean.append(clean_sequence[0])
            list_of_lists.append(new_list_clean)
    
    gc_list = []
    sequence_list = []
    for x in range(0, len(list_of_lists)):
        gc_list.append(float(list_of_lists[x][1]))
        sequence_list.append(list_of_lists[x][2])

    # calculate amino acid frequnecy 
    len_list = []
    m_list = []
    a_list = []
    c_list = []
    d_list = []
    e_list = []
    f_list = []
    g_list = []
    h_list = []
    i_list = []
    k_list = []
    l_list = []
    n_list = []
    p_list = []
    q_list = []
    r_list = []
    s_list = []
    t_list = []
    v_list = []
    w_list = []
    y_list = []

    for x in range(0, len(sequence_list)):
        len_list.append(len(sequence_list[x]))
        m_list.append(sequence_list[x].count('M')/len(sequence_list[x]))
        a_list.append(sequence_list[x].count('A')/len(sequence_list[x]))
        c_list.append(sequence_list[x].count('C')/len(sequence_list[x]))
        d_list.append(sequence_list[x].count('D')/len(sequence_list[x]))
        e_list.append(sequence_list[x].count('E')/len(sequence_list[x]))
        f_list.append(sequence_list[x].count('F')/len(sequence_list[x]))
        g_list.append(sequence_list[x].count('G')/len(sequence_list[x]))
        h_list.append(sequence_list[x].count('H')/len(sequence_list[x]))
        i_list.append(sequence_list[x].count('I')/len(sequence_list[x]))
        k_list.append(sequence_list[x].count('K')/len(sequence_list[x]))
        l_list.append(sequence_list[x].count('L')/len(sequence_list[x]))
        n_list.append(sequence_list[x].count('N')/len(sequence_list[x]))
        p_list.append(sequence_list[x].count('P')/len(sequence_list[x]))
        q_list.append(sequence_list[x].count('Q')/len(sequence_list[x]))
        r_list.append(sequence_list[x].count('R')/len(sequence_list[x]))
        s_list.append(sequence_list[x].count('S')/len(sequence_list[x]))
        t_list.append(sequence_list[x].count('T')/len(sequence_list[x]))
        v_list.append(sequence_list[x].count('V')/len(sequence_list[x]))
        w_list.append(sequence_list[x].count('W')/len(sequence_list[x]))
        y_list.append(sequence_list[x].count('Y')/len(sequence_list[x]))
    list_of_res = [gc_list, a_list, f_list, g_list, i_list, l_list, v_list]
    return list_of_res
list_of_files = ['ATP_Synthase_File_1.txt', 'amtb_file_1.txt', 'clc_file.txt', 'mraY_file.txt', 'mscs_file_test.txt', 'secd_file.txt', 'secf_file.txt', 'secy_file.txt']
    
atpB_list = make_frequency_lists('ATP_Synthase_File_1.txt')
amtb_list = make_frequency_lists('amtb_file_1.txt')
clc_list = make_frequency_lists('clc_file.txt')
mraY_list = make_frequency_lists('mraY_file.txt')
mscS_list = make_frequency_lists('mscs_file_test.txt')
secd_list = make_frequency_lists('secd_file.txt')
secf_list = make_frequency_lists('secf_file.txt')
secy_list = make_frequency_lists('secy_file.txt')

# len_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "Length",
#     y_axis_label = "GC Content"
#     )
    
# len_plot.circle(
#     x=len_list,
#     y=gc_list
#     )
    
# len_plot.title.text = 'amtB length'
# show(len_plot)
    
    
def regression(aa_list, gc_list_1):
    aa_array = np.array(aa_list)
    gc_array = np.array(gc_list_1)
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(aa_array, gc_array)
    print("R^2 = " + str(r_value))
    print('P = ' + str(p_value))
    y_regress = slope * aa_array + intercept
    return y_regress
# print(all_palettes['Viridis'][8])

# ('#440154', '#46317E', '#365A8C', '#277E8E', '#1EA087', '#49C16D', '#9DD93A', '#FDE724')

a_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Ala Frequency',
    x_axis_label = "A Frequency",
    y_axis_label = "GC Content"
    )
a_plot.circle(atpB_list[1], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
a_plot.circle(amtb_list[1], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
a_plot.circle(clc_list[1], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
a_plot.circle(mraY_list[1], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
a_plot.circle(mscS_list[1], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
a_plot.circle(secd_list[1], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
a_plot.circle(secf_list[1], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
a_plot.circle(secy_list[1], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
a_plot.line(x=np.array(atpB_list[1]), y = regression(atpB_list[1], atpB_list[0]), line_color = '#440154', line_width = 0.1)
a_plot.line(x=np.array(amtb_list[1]), y = regression(amtb_list[1], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
a_plot.line(x=np.array(clc_list[1]), y = regression(clc_list[1], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
a_plot.line(x=np.array(mraY_list[1]), y = regression(mraY_list[1], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
a_plot.line(x=np.array(mscS_list[1]), y = regression(mscS_list[1], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
a_plot.line(x=np.array(secd_list[1]), y = regression(secd_list[1], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
a_plot.line(x=np.array(secf_list[1]), y = regression(secf_list[1], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
a_plot.line(x=np.array(secy_list[1]), y = regression(secy_list[1], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# a_plot.legend.location = 'bottom_right'
a_plot.y_range = Range1d(25, 75)
show(a_plot)

f_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Phe Frequency',
    x_axis_label = "F Frequency",
    y_axis_label = "GC Content"
    )
f_plot.circle(atpB_list[2], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
f_plot.circle(amtb_list[2], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
f_plot.circle(clc_list[2], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
f_plot.circle(mraY_list[2], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
f_plot.circle(mscS_list[2], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
f_plot.circle(secd_list[2], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
f_plot.circle(secf_list[2], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
f_plot.circle(secy_list[2], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
f_plot.line(x=np.array(atpB_list[2]), y = regression(atpB_list[2], atpB_list[0]), line_color = '#440154', line_width = 0.1)
f_plot.line(x=np.array(amtb_list[2]), y = regression(amtb_list[2], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
f_plot.line(x=np.array(clc_list[2]), y = regression(clc_list[2], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
f_plot.line(x=np.array(mraY_list[2]), y = regression(mraY_list[2], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
f_plot.line(x=np.array(mscS_list[2]), y = regression(mscS_list[2], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
f_plot.line(x=np.array(secd_list[2]), y = regression(secd_list[2], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
f_plot.line(x=np.array(secf_list[2]), y = regression(secf_list[2], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
f_plot.line(x=np.array(secy_list[2]), y = regression(secy_list[2], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# f_plot.legend.location = 'bottom_right'
f_plot.y_range = Range1d(25, 75)
show(f_plot)

g_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Gly Frequency',
    x_axis_label = "G Frequency",
    y_axis_label = "GC Content"
    )
g_plot.circle(atpB_list[3], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
g_plot.circle(amtb_list[3], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
g_plot.circle(clc_list[3], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
g_plot.circle(mraY_list[3], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
g_plot.circle(mscS_list[3], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
g_plot.circle(secd_list[3], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
g_plot.circle(secf_list[3], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
g_plot.circle(secy_list[3], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
g_plot.line(x=np.array(atpB_list[3]), y = regression(atpB_list[3], atpB_list[0]), line_color = '#440154', line_width = 0.1)
g_plot.line(x=np.array(amtb_list[3]), y = regression(amtb_list[3], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
g_plot.line(x=np.array(clc_list[3]), y = regression(clc_list[3], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
g_plot.line(x=np.array(mraY_list[3]), y = regression(mraY_list[3], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
g_plot.line(x=np.array(mscS_list[3]), y = regression(mscS_list[3], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
g_plot.line(x=np.array(secd_list[3]), y = regression(secd_list[3], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
g_plot.line(x=np.array(secf_list[3]), y = regression(secf_list[3], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
g_plot.line(x=np.array(secy_list[3]), y = regression(secy_list[3], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# g_plot.legend.location = 'bottom_right'
g_plot.y_range = Range1d(25, 75)
show(g_plot)

i_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Ile Frequency',
    x_axis_label = "I Frequency",
    y_axis_label = "GC Content"
    )
i_plot.circle(atpB_list[4], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
i_plot.circle(amtb_list[4], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
i_plot.circle(clc_list[4], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
i_plot.circle(mraY_list[4], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
i_plot.circle(mscS_list[4], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
i_plot.circle(secd_list[4], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
i_plot.circle(secf_list[4], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
i_plot.circle(secy_list[4], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
i_plot.line(x=np.array(atpB_list[4]), y = regression(atpB_list[4], atpB_list[0]), line_color = '#440154', line_width = 0.1)
i_plot.line(x=np.array(amtb_list[4]), y = regression(amtb_list[4], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
i_plot.line(x=np.array(clc_list[4]), y = regression(clc_list[4], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
i_plot.line(x=np.array(mraY_list[4]), y = regression(mraY_list[4], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
i_plot.line(x=np.array(mscS_list[4]), y = regression(mscS_list[4], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
i_plot.line(x=np.array(secd_list[4]), y = regression(secd_list[4], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
i_plot.line(x=np.array(secf_list[4]), y = regression(secf_list[4], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
i_plot.line(x=np.array(secy_list[4]), y = regression(secy_list[4], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# i_plot.legend.location = 'bottom_right'
i_plot.y_range = Range1d(25, 75)
show(i_plot)

l_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Leu Frequency',
    x_axis_label = "A Frequency",
    y_axis_label = "GC Content"
    )
l_plot.circle(atpB_list[5], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
l_plot.circle(amtb_list[5], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
l_plot.circle(clc_list[5], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
l_plot.circle(mraY_list[5], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
l_plot.circle(mscS_list[5], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
l_plot.circle(secd_list[5], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
l_plot.circle(secf_list[5], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
l_plot.circle(secy_list[5], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
l_plot.line(x=np.array(atpB_list[5]), y = regression(atpB_list[5], atpB_list[0]), line_color = '#440154', line_width = 0.1)
l_plot.line(x=np.array(amtb_list[5]), y = regression(amtb_list[5], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
l_plot.line(x=np.array(clc_list[5]), y = regression(clc_list[5], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
l_plot.line(x=np.array(mraY_list[5]), y = regression(mraY_list[5], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
l_plot.line(x=np.array(mscS_list[5]), y = regression(mscS_list[5], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
l_plot.line(x=np.array(secd_list[5]), y = regression(secd_list[5], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
l_plot.line(x=np.array(secf_list[5]), y = regression(secf_list[5], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
l_plot.line(x=np.array(secy_list[5]), y = regression(secy_list[5], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# l_plot.legend.location = 'bottom_right'
l_plot.y_range = Range1d(25, 75)
show(l_plot)

v_plot = bokeh.plotting.figure(
    width=600,
    height=600,
    title='Val Frequency',
    x_axis_label = "V Frequency",
    y_axis_label = "GC Content"
    )
v_plot.circle(atpB_list[6], atpB_list[0], size = 3, fill_color = '#440154', line_color=None)
v_plot.circle(amtb_list[6], amtb_list[0], size = 3, fill_color = '#46317E', line_color=None)
v_plot.circle(clc_list[6], clc_list[0], size = 3, fill_color = '#365A8C', line_color=None)
v_plot.circle(mraY_list[6], mraY_list[0], size = 3, fill_color = '#277E8E', line_color=None)
v_plot.circle(mscS_list[6], mscS_list[0], size = 3, fill_color = '#1EA087', line_color=None)
v_plot.circle(secd_list[6], secd_list[0], size = 3, fill_color = '#49C16D', line_color=None)
v_plot.circle(secf_list[6], secf_list[0], size = 3, fill_color = '#9DD93A', line_color=None)
v_plot.circle(secy_list[6], secy_list[0], size = 3, fill_color = '#FDE724', line_color=None)
v_plot.line(x=np.array(atpB_list[6]), y = regression(atpB_list[6], atpB_list[0]), line_color = '#440154', line_width = 0.1)
v_plot.line(x=np.array(amtb_list[6]), y = regression(amtb_list[6], amtb_list[0]), line_color = '#46317E', line_width = 0.1)
v_plot.line(x=np.array(clc_list[6]), y = regression(clc_list[6], clc_list[0]), line_color = '#365A8C', line_width = 0.1)
v_plot.line(x=np.array(mraY_list[6]), y = regression(mraY_list[6], mraY_list[0]), line_color = '#277E8E', line_width = 0.1)
v_plot.line(x=np.array(mscS_list[6]), y = regression(mscS_list[6], mscS_list[0]), line_color = '#1EA087', line_width = 0.1)
v_plot.line(x=np.array(secd_list[6]), y = regression(secd_list[6], secd_list[0]), line_color = '#49C16D', line_width = 0.1)
v_plot.line(x=np.array(secf_list[6]), y = regression(secf_list[6], secf_list[0]), line_color = '#9DD93A', line_width = 0.1)
v_plot.line(x=np.array(secy_list[6]), y = regression(secy_list[6], secy_list[0]), line_color = '#FDE724', line_width = 0.1)
# v_plot.legend.location = 'bottom_right'
v_plot.y_range = Range1d(25, 75)
show(v_plot)





# f_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "F Frequency",
#     y_axis_label = "GC Content"
#     )
# f_plot.line(x=np.array(atpB_list[2]), y = regression(atpB_list[2], atpB_list[0]), color = '#440154FF', legend_label = 'atpB')
# f_plot.line(x=np.array(amtb_list[2]), y = regression(amtb_list[2], amtb_list[0]), color = '#404788FF', legend_label = 'amtB')
# f_plot.line(x=np.array(clc_list[2]), y = regression(clc_list[2], clc_list[0]), color = '#33638DFF', legend_label = 'clc')
# f_plot.line(x=np.array(mraY_list[2]), y = regression(mraY_list[2], mraY_list[0]), color = '#287D8EFF', legend_label = 'mraY')
# f_plot.line(x=np.array(mscS_list[2]), y = regression(mscS_list[2], mscS_list[0]), color = '#20A387FF', legend_label = 'mscS')
# f_plot.line(x=np.array(secd_list[2]), y = regression(secd_list[2], secd_list[0]), color = '#3CBB75FF', legend_label = 'secD')
# f_plot.line(x=np.array(secf_list[2]), y = regression(secf_list[2], secf_list[0]), color = '#73D055FF', legend_label = 'secF')
# f_plot.line(x=np.array(secy_list[2]), y = regression(secy_list[2], secy_list[0]), color = '#DCE319FF', legend_label = 'secY')
# f_plot.legend.location = 'bottom_right'
# show(f_plot)

# g_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "G Frequency",
#     y_axis_label = "GC Content"
#     )
# g_plot.line(x=np.array(atpB_list[3]), y = regression(atpB_list[3], atpB_list[0]), color = '#440154FF', legend_label = 'atpB')
# g_plot.line(x=np.array(amtb_list[3]), y = regression(amtb_list[3], amtb_list[0]), color = '#404788FF', legend_label = 'amtB')
# g_plot.line(x=np.array(clc_list[3]), y = regression(clc_list[3], clc_list[0]), color = '#33638DFF', legend_label = 'clc')
# g_plot.line(x=np.array(mraY_list[3]), y = regression(mraY_list[3], mraY_list[0]), color = '#287D8EFF', legend_label = 'mraY')
# g_plot.line(x=np.array(mscS_list[3]), y = regression(mscS_list[3], mscS_list[0]), color = '#20A387FF', legend_label = 'mscS')
# g_plot.line(x=np.array(secd_list[3]), y = regression(secd_list[3], secd_list[0]), color = '#3CBB75FF', legend_label = 'secD')
# g_plot.line(x=np.array(secf_list[3]), y = regression(secf_list[3], secf_list[0]), color = '#73D055FF', legend_label = 'secF')
# g_plot.line(x=np.array(secy_list[3]), y = regression(secy_list[3], secy_list[0]), color = '#DCE319FF', legend_label = 'secY')
# g_plot.legend.location = 'bottom_right'
# g_plot.y_range = Range1d(25, 75)
# show(g_plot)

# i_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "I Frequency",
#     y_axis_label = "GC Content"
#     )
# i_plot.line(x=np.array(atpB_list[4]), y = regression(atpB_list[4], atpB_list[0]), color = '#440154FF', alpha=0.5, legend_label = 'atpB')
# i_plot.line(x=np.array(amtb_list[4]), y = regression(amtb_list[4], amtb_list[0]), color = '#404788FF', legend_label = 'amtB')
# i_plot.line(x=np.array(clc_list[4]), y = regression(clc_list[4], clc_list[0]), color = '#33638DFF', legend_label = 'clc')
# i_plot.line(x=np.array(mraY_list[4]), y = regression(mraY_list[4], mraY_list[0]), color = '#287D8EFF', legend_label = 'mraY')
# i_plot.line(x=np.array(mscS_list[4]), y = regression(mscS_list[4], mscS_list[0]), color = '#20A387FF', legend_label = 'mscS')
# i_plot.line(x=np.array(secd_list[4]), y = regression(secd_list[4], secd_list[0]), color = '#3CBB75FF', legend_label = 'secD')
# i_plot.line(x=np.array(secf_list[4]), y = regression(secf_list[4], secf_list[0]), color = '#73D055FF', legend_label = 'secF')
# i_plot.line(x=np.array(secy_list[4]), y = regression(secy_list[4], secy_list[0]), color = '#DCE319FF', legend_label = 'secY')
# i_plot.legend.location = 'top_right'
# i_plot.y_range = Range1d(25, 75)
# show(i_plot)

# l_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "L Frequency",
#     y_axis_label = "GC Content"
#     )
# l_plot.line(x=np.array(atpB_list[5]), y = regression(atpB_list[5], atpB_list[0]), color = '#440154FF', legend_label = 'atpB')
# l_plot.line(x=np.array(amtb_list[5]), y = regression(amtb_list[5], amtb_list[0]), color = '#404788FF', legend_label = 'amtB')
# l_plot.line(x=np.array(clc_list[5]), y = regression(clc_list[5], clc_list[0]), color = '#33638DFF', legend_label = 'clc')
# l_plot.line(x=np.array(mraY_list[5]), y = regression(mraY_list[5], mraY_list[0]), color = '#287D8EFF', legend_label = 'mraY')
# l_plot.line(x=np.array(mscS_list[5]), y = regression(mscS_list[5], mscS_list[0]), color = '#20A387FF', legend_label = 'mscS')
# l_plot.line(x=np.array(secd_list[5]), y = regression(secd_list[5], secd_list[0]), color = '#3CBB75FF', legend_label = 'secD')
# l_plot.line(x=np.array(secf_list[5]), y = regression(secf_list[5], secf_list[0]), color = '#73D055FF', legend_label = 'secF')
# l_plot.line(x=np.array(secy_list[5]), y = regression(secy_list[5], secy_list[0]), color = '#DCE319FF', legend_label = 'secY')
# l_plot.legend.location = 'top_left'
# l_plot.y_range = Range1d(25, 75)
# show(l_plot)

# v_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "V Frequency",
#     y_axis_label = "GC Content"
#     )
# v_plot.line(x=np.array(atpB_list[6]), y = regression(atpB_list[6], atpB_list[0]), color = '#440154FF', legend_label = 'atpB')
# v_plot.line(x=np.array(amtb_list[6]), y = regression(amtb_list[6], amtb_list[0]), color = '#404788FF', legend_label = 'amtB')
# v_plot.line(x=np.array(clc_list[6]), y = regression(clc_list[6], clc_list[0]), color = '#33638DFF', legend_label = 'clc')
# v_plot.line(x=np.array(mraY_list[6]), y = regression(mraY_list[6], mraY_list[0]), color = '#287D8EFF', legend_label = 'mraY')
# v_plot.line(x=np.array(mscS_list[6]), y = regression(mscS_list[6], mscS_list[0]), color = '#20A387FF', legend_label = 'mscS')
# v_plot.line(x=np.array(secd_list[6]), y = regression(secd_list[6], secd_list[0]), color = '#3CBB75FF', legend_label = 'secD')
# v_plot.line(x=np.array(secf_list[6]), y = regression(secf_list[6], secf_list[0]), color = '#73D055FF', legend_label = 'secF')
# v_plot.line(x=np.array(secy_list[6]), y = regression(secy_list[6], secy_list[0]), color = '#DCE319FF', legend_label = 'secY')
# v_plot.legend.location = 'bottom_right'
# v_plot.y_range = Range1d(25, 75)
# show(v_plot)







# i_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "I Frequency",
#     y_axis_label = "GC Content"
#     )
    
# i_plot.circle(
#     x=i_list,
#     y=gc_list
#     )
    
# i_plot.title.text = 'I'
# i_array = np.array(i_list)
# i_plot.line(x=i_array, y = regression(i_list, gc_list))
# show(i_plot)

# l_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "L Frequency",
#     y_axis_label = "GC Content"
#     )
    
# l_plot.circle(
#     x=l_list,
#     y=gc_list
#     )
    
# l_plot.title.text = 'L'
# l_array = np.array(l_list)
# l_plot.line(x=l_array, y = regression(l_list, gc_list))
# show(l_plot)

# v_plot = bokeh.plotting.figure(
#     width=600,
#     height=600,
#     x_axis_label = "V Frequency",
#     y_axis_label = "GC Content"
#     )
    
# v_plot.circle(
#     x=v_list,
#     y=gc_list
#     )
    
# v_plot.title.text = 'V'
# v_array = np.array(v_list)
# v_plot.line(x=v_array, y = regression(v_list, gc_list))
# show(v_plot)


        

R^2 = 0.7284923198479859
P = 1.907452613411229e-12
R^2 = 0.7544307350908042
P = 8.007083485609734e-12
R^2 = 0.8453377696713078
P = 4.104018920004397e-14
R^2 = 0.6241212940199315
P = 7.80234525263519e-09
R^2 = 0.8804156936334376
P = 3.880430180425e-19
R^2 = 0.8488077868150866
P = 6.935010633455455e-15
R^2 = 0.7885562811293583
P = 1.0515388868838108e-11
R^2 = 0.4681441678735912
P = 2.5986493166812375e-05


R^2 = 0.04749034737225447
P = 0.7005544942889157
R^2 = -0.6700751547994157
P = 8.775071687763241e-09
R^2 = -0.8079592741830707
P = 3.8860277137958265e-12
R^2 = -0.6323213879076233
P = 4.3211158473146905e-09
R^2 = -0.6808337301845979
P = 7.758014143802819e-09
R^2 = -0.3573610514609142
P = 0.01084187417079805
R^2 = -0.5408600085590277
P = 5.011607077162991e-05
R^2 = -0.3592326442785871
P = 0.0016707714774102523


R^2 = 0.11268928025740547
P = 0.36021777037544556
R^2 = 0.5252675216498945
P = 2.3010279866195626e-05
R^2 = 0.4840092750004492
P = 0.000491269114529656
R^2 = 0.6892087178278661
P = 4.2101566910494794e-11
R^2 = 0.4546299299050941
P = 0.0004316119933370014
R^2 = 0.209021224633776
P = 0.14518081338940655
R^2 = 0.7571352218172791
P = 1.984754726781355e-10
R^2 = 0.7652060219059261
P = 2.0572654462678947e-15


R^2 = -0.44055823386769616
P = 0.00017003288428877553
R^2 = -0.8804085366085985
P = 8.585057946475242e-20
R^2 = -0.7806394750681935
P = 6.025847776276139e-11
R^2 = -0.758869825638846
P = 2.716202824998351e-14
R^2 = -0.883201908943479
P = 2.1305547485529595e-19
R^2 = -0.8591940420695118
P = 1.4211329617737346e-15
R^2 = -0.8525161646936961
P = 3.9930130241555055e-15
R^2 = -0.5482143014422968
P = 4.2811218551930984e-07


R^2 = -0.15651038421214256
P = 0.20246461796481202
R^2 = 0.3451754617724814
P = 0.007962539519324664
R^2 = 0.16150861071480951
P = 0.27277772811070244
R^2 = 0.21575719845748031
P = 0.07283810738351089
R^2 = 0.31724578433113415
P = 0.017199977026643106
R^2 = -0.2978782658150596
P = 0.0356411436411616
R^2 = 0.1291434362276181
P = 0.37140960615210505
R^2 = 0.23591392503812808
P = 0.04301987392737738


R^2 = 0.40037503869676044
P = 0.0007168039016631464
R^2 = 0.4724020906590754
P = 0.00018103061159716448
R^2 = 0.6357069879915944
P = 1.2052039169827595e-06
R^2 = 0.21489383975187623
P = 0.07401952098100371
R^2 = 0.5058995035412684
P = 6.965062259304869e-05
R^2 = -0.35236698433644137
P = 0.01208484366254494
R^2 = 0.5358532428989882
P = 6.0628855281710095e-05
R^2 = 0.331199001333057
P = 0.003946526218945435
