In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import math
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.preprocessing import MinMaxScaler

import re
import os
from scipy.stats import skew, kurtosis
plt.ioff()

In [2]:
seq = {
    'ALA': 'A',
    'ARG': 'R',
    'ASN' : 'N',
    'ASP': 'D',
    'CYS': 'C',
    'GLU': 'E',
    'GLN': 'Q',
    'GLY' : 'G',
    'HIS': 'H',
    'ILE': 'I',
    'LEU': 'L',
    'LYS': 'K',
    'MET': 'M',
    'PHE': 'F',
    'PRO': 'P',
    'SER': 'S',
    'THR': 'T',
    'TRP': 'W',
    'TYR': 'Y',
    'VAL': 'V'
}

In [3]:
def find_replace_multi_ordered(string):
    
    for item in sorted(seq.keys(), key = len, reverse = True):
        string = re.sub(item, seq[item], string)
    return string

In [4]:
for i in range(3, 42):
    file_name = './unfiltered/data_' + str(i) + '-unfiltered.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_unfiltered/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_unfiltered/' + str(i) + '/' + j)
        plt.close('all')

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

  interactivity=interactivity, compiler=compiler, result=result)


40 41 

In [5]:
for i in range(3, 42):
    file_name = './unfiltered/data_' + str(i) + '-unfiltered-invariant.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_unfiltered_invariant/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_unfiltered_invariant/' + str(i) + '/' + j)
        plt.close('all')

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 

In [6]:
for i in range(3, 42):
    file_name = './unfiltered/data_' + str(i) + '-unfiltered-variant.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_unfiltered_variant/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_unfiltered_variant/' + str(i) + '/' + j)
        plt.close('all')

3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 

In [8]:
for i in range(3, 42):
    file_name = './filtered/data_' + str(i) + '-filtered.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_filtered/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0 or df[df['fragment_one'] == j]['tscore'].count() == 1:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_filtered/' + str(i) + '/' + j)
        plt.close('all')

3 

  


4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 

In [9]:
for i in range(3, 42):
    file_name = './filtered/data_' + str(i) + '-filtered-variant.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_filtered_variant/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0 or df[df['fragment_one'] == j]['tscore'].count() == 1:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_filtered_variant/' + str(i) + '/' + j)
        plt.close('all')

3 

  


4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 

In [10]:
for i in range(3, 42):
    file_name = './filtered/data_' + str(i) + '-filtered-invariant.csv'
    df = pd.read_csv(file_name).head(3000)
    df.fillna(0, inplace=True)
    os.makedirs('./plots_filtered_invariant/' + str(i), exist_ok=True)
    
    print(i, end=' ')
    
    fragments = df['fragment_one'].unique()
    
    plt.close('all')
    
    for j in fragments:
        plt.figure(figsize=(20, 10))
        
        if df[df['fragment_one'] == j]['tscore'].mean() == 0.0 or df[df['fragment_one'] == j]['tscore'].count() == 1:
            continue
            
        std = df[df['fragment_one'] == j]['tscore'].std()

        _ = plt.hist(df[df['fragment_one'] == j]['tscore'], bins=np.arange(std * -4, std * 5, std), edgecolor='black', linewidth=1.2, align='mid', color="grey");
        plt.title(find_replace_multi_ordered(j), fontsize=20)
        yellow_patch = mpatches.Patch(label='T-Score Std: ' + str(df[df['fragment_one'] == j]['tscore'].std().round(3)))
        red_patch = mpatches.Patch(label='Skewness: ' + str(df[df['fragment_one'] == j]['skew'].iloc[0].round(3)))
        blue_patch = mpatches.Patch(label='Excess Kurtosis: ' + str(df[df['fragment_one'] == j]['kurtosis'].iloc[0].round(3)))
        green_patch = mpatches.Patch(label='Count: ' + str(df[df['fragment_one'] == j]['n'].iloc[0]))
        plt.legend(handles=[red_patch, blue_patch, yellow_patch, green_patch], prop={'size': 12}, loc=(0.7, 1.001))
        plt.xlabel('t-score (mean + std)', fontsize=18)
        plt.ylabel('Frequency', fontsize=18)
    
        plt.savefig('./plots_filtered_invariant/' + str(i) + '/' + j)
        plt.close('all')

3 

  


4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 