# Correlation between the fission yeast transcriptome and proteome

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%load_ext autoreload
%autoreload 2

# Loading data

In [106]:
# set data path
path_data_mRNA = "data/pat1_average_modified.txt"
path_data_prot = "data/wtratioall.csv"

# read data from files
raw_data_mRNA = pd.read_csv(path_data_mRNA, sep='\t', header=None)
raw_data_prot = pd.read_csv(path_data_prot, index_col=0)

In [107]:
raw_data_mRNA.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,aap1,AAP1,SPBC1652.02 SPBC16A3.20C,1.0,22.703531,0.96036,0.54446,0.896232,1.160251,1.252143,2.309153,2.31075,2.449327,2.706837,3.064384
1,abc1: C2D10.18,ABC1,SPBC2D10.18,1.0,1.043491,0.622283,0.663191,0.619409,0.648403,0.662236,0.655953,0.580761,0.606205,0.752322,0.798264
2,abc1: C9E9.12c,ABC1,SPAC9E9.12C,1.0,2.112546,0.906822,0.565746,0.53522,1.224398,10.170195,7.380023,4.455168,2.804587,1.657471,1.31063
3,abp1,CBP1 ABP1,SPBC1105.04C,1.0,1.351277,0.884985,0.705397,0.401055,0.202862,0.17536,0.82992,0.959749,1.121036,1.122949,1.103435
4,abp2,ABP2,SPBC1861.02,1.0,0.816301,1.969788,1.736116,1.057102,0.494822,0.372226,0.533438,0.851486,0.989702,1.115665,1.159631


Processing proteins data

In [183]:
# split protein data into 3 sets
data_prot_A = pd.DataFrame(raw_data_prot.ix[:,0:11].values)
data_prot_B = pd.DataFrame(raw_data_prot.ix[:,11:22].values)
data_prot_C = pd.DataFrame(raw_data_prot.ix[:,22:33].values)

# compute average of the 3 data sets
data_prot = (data_prot_A + data_prot_B + data_prot_C).copy()/3.0

# set columns and rows indexes
data_prot.columns = ['plt'+str(k) for k in range(0,11)]
data_prot.index = raw_data_prot.index

# show begining of dataframe
data_prot.head()

Unnamed: 0,plt0,plt1,plt2,plt3,plt4,plt5,plt6,plt7,plt8,plt9,plt10
SPAC1002.02,1.0,1.393371,3.257203,5.591645,6.932151,5.479337,3.574659,2.726113,2.114409,1.764191,1.439884
SPAC1002.03c,1.0,0.910152,0.785974,0.750362,0.746,0.770242,0.800074,0.843706,0.872157,0.819914,0.839864
SPAC1002.04c,1.0,0.990121,0.980455,1.050549,1.248047,1.217455,0.880529,0.704999,0.712103,0.701598,0.795477
SPAC1002.07c,1.0,1.046894,1.307253,1.640936,2.070811,2.12554,2.078482,1.9979,1.900363,1.857979,1.780422
SPAC1002.09c,1.0,0.904997,0.85532,0.872414,0.904201,0.923575,0.93211,0.93983,0.961271,0.991512,1.018891


Processing mRNA data

In [193]:
# remove first 2 columns (TODO: need to check if it removes useful names)
data_mRNA = raw_data_mRNA.drop([0,1], axis=1).copy()

# set columns indexes
data_mRNA.set_index([2], inplace=True)
data_mRNA.index.name = None
data_mRNA.columns = ['mlt'+str(k) for k in range(0,12)]

# show begining of dataframe
data_mRNA.head()

Unnamed: 0,mlt0,mlt1,mlt2,mlt3,mlt4,mlt5,mlt6,mlt7,mlt8,mlt9,mlt10,mlt11
SPBC1652.02 SPBC16A3.20C,1.0,22.703531,0.96036,0.54446,0.896232,1.160251,1.252143,2.309153,2.31075,2.449327,2.706837,3.064384
SPBC2D10.18,1.0,1.043491,0.622283,0.663191,0.619409,0.648403,0.662236,0.655953,0.580761,0.606205,0.752322,0.798264
SPAC9E9.12C,1.0,2.112546,0.906822,0.565746,0.53522,1.224398,10.170195,7.380023,4.455168,2.804587,1.657471,1.31063
SPBC1105.04C,1.0,1.351277,0.884985,0.705397,0.401055,0.202862,0.17536,0.82992,0.959749,1.121036,1.122949,1.103435
SPBC1861.02,1.0,0.816301,1.969788,1.736116,1.057102,0.494822,0.372226,0.533438,0.851486,0.989702,1.115665,1.159631
