In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import numpy as np
import os

plt.style.use('bmh')

odir = './correlation_results'
if not os.path.exists(odir):
    os.mkdir(odir)

In [None]:
demux_reads = pd.read_table('/Volumes/sheynkman/projects/smc_proteogenomics/full_pacbio_analysis/isoseq/results/demux/demuxed_reads_by_barcode_and_file.tsv')
file_rename = {
    'ST94_GS211122_hifi_reads' : 'run1',
    'ST94b_cell1-1523-m64152e_220305_042004' : 'run2_cell1',
    'ST94b_cell2-1525-m64152e_220306_103206' : 'run2_cell2',
    'ST94b_cell3-1527-m64152e_220307_164404' : 'run2_cell3',
    }
demux_reads['file'] = demux_reads['file'].map(file_rename)

demux_file = demux_reads.groupby(['pbid', 'file'])['fl_count'].sum().reset_index(name='fl_count')
demux_file['log2(FL+1)'] = np.log2(demux_file['fl_count'] + 1)



In [None]:
demux_reads

In [None]:

sns.violinplot(y = 'file', x = 'log2(FL+1)', data=demux_file, orient='h')
plt.savefig('log2_violinplot_by_file.png', bbox_inches='tight')


In [None]:
demux_pivot_file = pd.read_table('./results/demux/demuxed_pivot_by_file.tsv', index_col=0)

demux_pivot_file.rename(columns = file_rename, inplace=True)
demux_pivot_file = np.log2(demux_pivot_file + 1)
corr_file = demux_pivot_file.corr()
round(corr_file,2)
ax = sns.heatmap(corr_file, vmin=0, annot=True)
ax.set_title('log2(FL+1) correlation of files')
plt.savefig(os.path.join(odir, 'file_corr.png'), bbox_inches='tight')




In [None]:
plt.style.available

In [None]:
# plt.scatter(x = demux_pivot_file['ST94_GS211122_hifi_reads'], y = demux_pivot_file['ST94b_cell1-1523-m64152e_220305_042004'])
plt.style.use('seaborn-paper')
sns.pairplot(demux_pivot_file, diag_kind = "kde")
# plt.title('Log2(FL+1) correlation pairplot')
plt.savefig(os.path.join(odir, 'pairplot_file_log2.png'), bbox_inches='tight')

# plt.style.use('seaborn-talk')


In [None]:
plt.style.use('seaborn-talk')

demux_pivot_barcode = pd.read_table('./results/demux/demuxed_pivot_by_barcode.tsv', index_col=0)

demux_pivot_barcode = np.log2(demux_pivot_barcode + 1)
corr_barcode = demux_pivot_barcode.corr()
round(corr_barcode,2)
sns.heatmap(corr_barcode, vmin=0, annot=True)
plt.title('Correlation heatmap of log2(FL+1) abbundance by barcode')
plt.savefig(os.path.join(odir, 'barcode_corr_log2.png'), bbox_inches='tight')
plt.show()

In [None]:
demux_pivot_barcode

In [None]:
demux_pivot = pd.read_table('./results/demux/demuxed_pivot_by_barcode_and_file.tsv', header = [0,1], index_col=0)
demux_pivot = np.log2(demux_pivot + 1)


In [None]:
demux_pivot

In [None]:
# demux_pivot.rename(columns = file_rename, inplace=True)
# demux_pivot.columns

file_order = ['run1', 'run2_cell1', 'run2_cell2', 'run2_cell3']

In [None]:

for barcode in demux_pivot.columns.get_level_values(0):
    tmp_barcode_pivot = demux_pivot.iloc[:, demux_pivot.columns.get_level_values(0) == barcode]
    tmp_barcode_pivot = tmp_barcode_pivot.droplevel(0, axis = 1)

    corr = tmp_barcode_pivot.corr()
    corr.index = pd.Categorical(corr.index, categories=file_order)
    corr.sort_index(inplace=True)
    corr = corr[file_order]
    round(corr, 2)
    ax = sns.heatmap(corr, vmin=0, annot=True)
    ax.set_title(f'Correlation heatplot of log2(FL+1) abundance of files\n{barcode}')

    plt.savefig(os.path.join(odir, f'{barcode}_file_corr_log2.png'), bbox_inches='tight')
    plt.show()
    plt.clf()



In [None]:
demux_reads = pd.read_table('/Volumes/sheynkman/projects/smc_proteogenomics/full_pacbio_analysis/isoseq/results/demux_X/demuxed_reads_by_barcode_and_file.tsv')
    
demux_reads['file'] = demux_reads['file'].map(file_rename)
read_count_file = demux_reads.groupby('file')['fl_count'].sum().reset_index(name='FL reads')
plt.style.use('seaborn-poster')
ax = sns.barplot(y = 'file', x = 'FL reads', data = read_count_file, color = 'salmon')
# plt.ticklabel_format(style='plain', axis='y')
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.savefig('./corr_results/total_fl_reads_file.png', bbox_inches='tight')

In [None]:
read_count_barcode = demux_reads.groupby('barcode')['fl_count'].sum().reset_index(name='FL reads')
plt.style.use('seaborn-poster')
ax = sns.barplot(y = 'barcode', x = 'FL reads', data = read_count_barcode, color = 'salmon', orient='h')
# plt.ticklabel_format(style='plain', axis='y')
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))

plt.savefig('./corr_results/total_fl_reads_barcode.png', bbox_inches='tight')

In [None]:
demux_reads