# LD calculation between top eQTL in GTEx V6 data
This version is based on simple correlation coefficient $r^2$ and the complete procedure is demonstarted in this notebook.

## Load genotype matrix

In [None]:
import pandas as pd
import numpy as np
head = pd.read_csv("Whole_Blood_Analysis.snps.txt.gz", sep = '\t', nrows = 1)
dtype = {'Id': str}
dtype.update({x: np.float16 for x in head.columns if x != 'Id'})
geno = pd.read_csv("Whole_Blood_Analysis.snps.txt.gz", sep = '\t', index_col = 0, header = 0, dtype = dtype).T 

## Load SNPs of interest

In [None]:
# {chr: (pos, name)}
snps = [(y[1], (int(y[2]), "_".join(y[1:]))) for y in [x.strip().split('_') for x in open('SNPList.maxz.txt').readlines()]]
snps = {chrom: [y[1] for y in sorted(set([item[1] for item in snps if item[0] == chrom]))] for chrom in set([x[0] for x in snps])}

## Extract genotype of interest by chromosome

In [None]:
data = {}
for key in snps:
    names = [x for x in snps[key] if x in geno.columns]
    if names:
        data[key] = geno[names]

## Calculate $r^2$ for SNPs per chromosome

In [None]:
LD = {key: np.power(data[key].corr(method = 'pearson'), 2) for key in data}

## Do the same analysis for combined data

In [None]:
all_data = pd.concat(data.values(), axis = 1)
all_LD = np.power(all_data.corr(method = 'pearson'), 2)

## Save result as R data frame via `feather`

In [None]:
import feather
for key in LD:
    feather.write_dataframe(LD[key].copy(), "LD_chrom_{}.feather".format(key))
feather.write_dataframe(all_LD.copy(), "LD_all.feather".format(key))