# Correlation between the fission yeast transcriptome and proteome

In [2]:
%matplotlib inline
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

# Loading data

In [3]:
# set data path
path_data_mRNA = "data/pat1_average_modified.txt"
path_data_prot = "data/wtratioall.csv"

# read data from files
raw_data_mRNA = pd.read_csv(path_data_mRNA, sep='\t', header=None)
raw_data_prot = pd.read_csv(path_data_prot, index_col=0)

In [4]:
# preview of the raw mRNA data
raw_data_mRNA.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,aap1,AAP1,SPBC1652.02 SPBC16A3.20C,1.0,22.703531,0.96036,0.54446,0.896232,1.160251,1.252143,2.309153,2.31075,2.449327,2.706837,3.064384
1,abc1: C2D10.18,ABC1,SPBC2D10.18,1.0,1.043491,0.622283,0.663191,0.619409,0.648403,0.662236,0.655953,0.580761,0.606205,0.752322,0.798264
2,abc1: C9E9.12c,ABC1,SPAC9E9.12C,1.0,2.112546,0.906822,0.565746,0.53522,1.224398,10.170195,7.380023,4.455168,2.804587,1.657471,1.31063
3,abp1,CBP1 ABP1,SPBC1105.04C,1.0,1.351277,0.884985,0.705397,0.401055,0.202862,0.17536,0.82992,0.959749,1.121036,1.122949,1.103435
4,abp2,ABP2,SPBC1861.02,1.0,0.816301,1.969788,1.736116,1.057102,0.494822,0.372226,0.533438,0.851486,0.989702,1.115665,1.159631


In [5]:
# preview of the raw protein data
raw_data_prot.head()

Unnamed: 0,A_00,A_01,A_02,A_03,A_04,A_05,A_06,A_07,A_08,A_09,...,C_01,C_02,C_03,C_04,C_05,C_06,C_07,C_08,C_09,C_10
SPAC1002.02,1,1.451188,3.734827,6.181905,7.607555,5.215707,3.439898,2.598756,2.18323,1.81129,...,1.555395,3.67654,6.030714,6.63578,5.153358,3.210663,2.511956,1.853247,1.525539,1.33268
SPAC1002.03c,1,0.922745,0.763064,0.74945,0.742595,0.771641,0.819817,0.859544,0.849914,0.838128,...,0.901151,0.828585,0.779982,0.767321,0.813157,0.830826,0.875965,0.938763,0.819587,0.850455
SPAC1002.04c,1,1.063774,1.109541,1.105289,1.256196,1.193269,0.834514,0.698355,0.71428,0.716728,...,0.834014,0.82909,0.911089,1.179868,1.106116,0.8248,0.652223,0.654474,0.618405,0.651751
SPAC1002.07c,1,0.946289,1.270676,1.577819,2.059521,2.07945,2.026755,1.934475,1.886784,1.820797,...,1.145764,1.396319,1.819543,2.141494,2.219369,2.134959,2.000173,1.98348,1.959289,1.90266
SPAC1002.09c,1,0.909058,0.871595,0.90703,0.941867,0.963464,0.963832,0.978117,1.026633,1.043288,...,0.917147,0.879789,0.881237,0.897882,0.903383,0.907655,0.899867,0.909359,0.945886,0.975173


Processing proteins data

In [183]:
# split protein data into 3 sets
data_prot_A = pd.DataFrame(raw_data_prot.ix[:,0:11].values)
data_prot_B = pd.DataFrame(raw_data_prot.ix[:,11:22].values)
data_prot_C = pd.DataFrame(raw_data_prot.ix[:,22:33].values)

# compute average of the 3 data sets
data_prot = (data_prot_A + data_prot_B + data_prot_C).copy()/3.0

# set columns and rows indexes
data_prot.columns = ['plt'+str(k) for k in range(0,11)]
data_prot.index = raw_data_prot.index

# show begining of dataframe
data_prot.head()

Unnamed: 0,plt0,plt1,plt2,plt3,plt4,plt5,plt6,plt7,plt8,plt9,plt10
SPAC1002.02,1.0,1.393371,3.257203,5.591645,6.932151,5.479337,3.574659,2.726113,2.114409,1.764191,1.439884
SPAC1002.03c,1.0,0.910152,0.785974,0.750362,0.746,0.770242,0.800074,0.843706,0.872157,0.819914,0.839864
SPAC1002.04c,1.0,0.990121,0.980455,1.050549,1.248047,1.217455,0.880529,0.704999,0.712103,0.701598,0.795477
SPAC1002.07c,1.0,1.046894,1.307253,1.640936,2.070811,2.12554,2.078482,1.9979,1.900363,1.857979,1.780422
SPAC1002.09c,1.0,0.904997,0.85532,0.872414,0.904201,0.923575,0.93211,0.93983,0.961271,0.991512,1.018891


Processing mRNA data

In [193]:
# remove first 2 columns (TODO: need to check if it removes useful names)
data_mRNA = raw_data_mRNA.drop([0,1], axis=1).copy()

# set columns indexes
data_mRNA.set_index([2], inplace=True)
data_mRNA.index.name = None
data_mRNA.columns = ['mlt'+str(k) for k in range(0,12)]

# show begining of dataframe
data_mRNA.head()

Unnamed: 0,mlt0,mlt1,mlt2,mlt3,mlt4,mlt5,mlt6,mlt7,mlt8,mlt9,mlt10,mlt11
SPBC1652.02 SPBC16A3.20C,1.0,22.703531,0.96036,0.54446,0.896232,1.160251,1.252143,2.309153,2.31075,2.449327,2.706837,3.064384
SPBC2D10.18,1.0,1.043491,0.622283,0.663191,0.619409,0.648403,0.662236,0.655953,0.580761,0.606205,0.752322,0.798264
SPAC9E9.12C,1.0,2.112546,0.906822,0.565746,0.53522,1.224398,10.170195,7.380023,4.455168,2.804587,1.657471,1.31063
SPBC1105.04C,1.0,1.351277,0.884985,0.705397,0.401055,0.202862,0.17536,0.82992,0.959749,1.121036,1.122949,1.103435
SPBC1861.02,1.0,0.816301,1.969788,1.736116,1.057102,0.494822,0.372226,0.533438,0.851486,0.989702,1.115665,1.159631
