# Approach 4


## Code notes:

- since we deal with pairs of one variable (organ-organ or strain-strain), while kkeeping the other constant, the following terms are applicable:
    - `var_1`: represents the var pairs we want to consider
    - `var_2`: represents the var kept over which the likelihood is computed

        e.g, for organ-organ likelihood, `var_1` will be the organs and `var_2` will be the strains

- `var_idx` repredents the index corresponding to what part of the column name contains the variable name, when split by '_'. e.g if the variable is organ, then the organ names are in index 0, because when you split a column name (split by '_') into its parts, the organ name is the first item. SImilarly, the strain name is the second item (index is 1)


In [31]:
import matplotlib.pyplot as plt
from src import utils
import pandas as pd
import numpy as np

%matplotlib inline
plt.rcParams['font.size'] = 12

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Read Data

In [55]:
data = pd.read_csv('../data/raw/pearson_organpair_4 strains_corrected.csv', index_col=0)
# data.head()


# data = pd.read_csv(r'C:\Users\Aroge\Desktop\junk\pearson_2organpair_2strains_corrected.csv', index_col=0)
data.head()

data = utils.get_same_row_col_labels(data)
cols = data.columns.values
cols

index = data.index.values
index
print(np.all(cols == index))



True


### Organ-Organ-lh

In [62]:
org_org_df = utils.get_organ_var_likelihood(0, 1, data, geom_normalize=True, use_common_var_2=True, debug=False)
org_org_df

Unnamed: 0,eye,kidney,liver,lung,muscle,pancreas,small.intestine,spleen
eye,3.121798,10.409211,23.191776,9.149386,7.497321,21.29413,8.720758,7.919263
kidney,,2.852896,3.333876,2.404913,9.961102,2.680285,3.94959,2.888752
liver,,,6.100181,3.447076,95.981707,3.686459,7.98004,4.744911
lung,,,,2.055504,6.271727,5.07831,1.823408,1.161422
muscle,,,,,1.055759,13.898901,4.381725,12.506341
pancreas,,,,,,5.631879,4.513222,5.518157
small.intestine,,,,,,,1.448935,3.693332
spleen,,,,,,,,1.725389


### Strain-Strain-lh

In [7]:
str_str_df = utils.get_strain_var_likelihood(1, 0, data, geom_normalize=True, use_common_var_2=True)
str_str_df

mu for ('AJ', 'BL') and ('bone', 'bone') = 0.9082195356666666 and std = 0.15896842736759656
mu for ('AJ', 'BL') and ('bone', 'brain') = 0.16141347974999998 and std = 0.020104080473092537
mu for ('BL', 'BL') and ('bone', 'brain') = 0.66211574825 and std = 0.030285584329499
mu for ('BL', 'BL') and ('bone', 'bone') = 0.9475546393333333 and std = 0.09083802929594105


Unnamed: 0,AJ,BL
AJ,6.020103,1.333347
BL,0.936176,5.492355


### Likelihood

In [8]:
org_org_df = get_organ_var_likelihood(0, 1, data, geom_normalize=True, use_common_var_2=False)
with pd.ExcelWriter('../../result/organpair_4 strains_corrected/combined-likelihood/1-org-org-lh-app4.xlsx') as excel_path:
    org_org_df.to_excel(excel_path)

In [9]:
strain_strain_df = get_strain_var_likelihood(1, 0, data, geom_normalize=True, use_common_var_2=True)
with pd.ExcelWriter('../../result/organpair_4 strains_corrected/combined-likelihood/2-strain-strain-lh-app4.xlsx') as excel_path:
    strain_strain_df.to_excel(excel_path)

### Ranking

In [10]:
raw_df_path = '../../result/organpair_4 strains_corrected/combined-likelihood/1-org-org-lh-app4.xlsx'
save_path = '../../result/organpair_4 strains_corrected/combined-likelihood/1-org-org-lh-rank-app4.xlsx'
get_ranking(raw_df_path, save_path)

In [11]:
raw_df_path = '../../result/organpair_4 strains_corrected/combined-likelihood/2-strain-strain-lh-app4.xlsx'
save_path = '../../result/organpair_4 strains_corrected/combined-likelihood/2-strain-strain-lh-rank-app4.xlsx'
get_ranking(raw_df_path, save_path)

## Debugging

### Organ pair

In [12]:
a = get_var_pair_df(('bone', 'bone'), data=data)
a
b = get_var_pair_df(('AJ', 'AJ'), data=a)
b

Unnamed: 0,bone_AJ_r1,bone_AJ_r2
bone_AJ_r1,1.0,0.724659
bone_AJ_r2,0.724659,1.0


In [13]:
a = get_var_pair_df(('bone', 'bone'), data=data)
a
b = get_var_pair_df(('BL', 'BL'), data=a)
b

Unnamed: 0,bone_BL_r6,bone_BL_r7
bone_BL_r6,1.0,0.842664
bone_BL_r7,0.842664,1.0


### strain pair

In [14]:
a = get_var_pair_df(('AJ', 'AJ'), data=data)
a
b = get_var_pair_df(('bone', 'brain'), data=a)
b

Unnamed: 0,brain_AJ_r1,brain_AJ_r2
bone_AJ_r1,0.185343,0.153868
bone_AJ_r2,0.138242,0.1682


In [15]:
a = get_var_pair_df(('BL', 'BL'), data=data)
a
b = get_var_pair_df(('bone', 'brain'), data=a)
b

Unnamed: 0,brain_BL_r1,brain_BL_r2,brain_BL_r3,brain_BL_r4,brain_BL_r5,brain_BL_r6,brain_BL_r7,brain_BL_r8,brain_BL_r9
bone_BL_r6,0.634599,0.702376,0.648401,0.63335,0.659042,0.65178,0.646761,0.719912,0.720934
bone_BL_r7,0.667836,0.643652,0.721478,0.704047,0.611087,0.649846,0.703551,0.706947,0.772451
