# Get CoExpression Data for Browser
- **Author(s)** - Teresa Perinan, Kajsa Brolin, Frank Grenn
- **Date Started** - June 2020
- **Quick Description:** filter the coexpression data for genes in the browser and combine the column types

In [None]:
import pandas as pd
import numpy as np

In [None]:
DATADIR = '$PATH/AppDataProcessing'
WRKDIR = f"{DATADIR}/coexpression"


In [None]:
# Read evidence genes

genes = pd.read_csv(f"{DATADIR}/genes_by_locus.csv", sep=",")
print(genes.shape)
print(genes.head())


In [None]:
# Read coexpression data

#g2pml = pd.read_csv(f"{WRKDIR}/G2PMLData4IPDGC.csv", sep=",")
g2pml = pd.read_csv(f"{WRKDIR}/Genes_g2pml_present_genes_by_locus.csv", sep=",")
print(g2pml.shape)
print(g2pml.head())

In [None]:

# Merge evidence genes and coexpression data

#merged = pd.merge(left = genes, right = g2pml, left_on="GENE", right_on = 'gene')
merged = pd.merge(left = genes, right = g2pml, on = ['GENE','GWAS','LOC_NUM'])
print(merged.shape)
#print(merged.head())
#merged.to_csv("coexpression/Genes_g2pml_present_genes_by_locus.csv", index=False, sep=";")

In [None]:
merged.head()

### group the columns

In [None]:
expstr = 'Expression'
adjstr = 'Adjacency'
mmstr = 'Module Membership'

In [None]:
#expcols = [name for name in merged.columns if 'ExprSpecific' in name]
expcols = [name for name in merged.columns if expstr in name]
print(len(expcols))

In [None]:
expcols[0:4]

In [None]:
#adjcols = [name for name in merged.columns if 'AdjSpecificAdj' in name]
adjcols = [name for name in merged.columns if adjstr in name]
print(len(adjcols))

In [None]:
#mmcols = [name for name in merged.columns if 'RankedMMSpecific' in name]
mmcols = [name for name in merged.columns if mmstr in name]
print(len(mmcols))

### set 1's to the tissue name from the column

In [None]:
merged.loc[merged['ExprSpecificAdiposeSub']!=0].head()

In [None]:
merged_named = merged.copy()
merged_named[expcols] = merged_named.loc[:,expcols].replace(1, pd.Series(merged_named.columns.str.replace(" "+expstr,""),merged_named.columns))


In [None]:
(merged_named.loc[merged_named['ExprSpecificAdiposeSub']!=0].head())

In [None]:
#now for adj
merged_named[adjcols] = merged_named.loc[:,adjcols].replace(1, pd.Series(merged_named.columns.str.replace(" "+adjstr,""),merged_named.columns))


In [None]:
#now for mm
merged_named[mmcols] = merged_named.loc[:,mmcols].replace(1, pd.Series(merged_named.columns.str.replace(" "+mmstr,""),merged_named.columns))


### start combining the data into three columns (expression, adjacency, module membership)

In [None]:
coexp_form = merged_named.copy()#pd.DataFrame(data={'GENE': merged['gene']})
print(coexp_form.shape)
print(coexp_form.head())

In [None]:
coexp_form['Expression'] = coexp_form[expcols].apply(lambda row: ';'.join([name for name in row.values.astype(str) if name !='0']), axis=1)

In [None]:
coexp_form['Expression']

In [None]:
coexp_form['Adjacency'] = coexp_form[adjcols].apply(lambda row: ';'.join([name for name in row.values.astype(str) if name !='0']), axis=1)

In [None]:
coexp_form['Module Membership'] = coexp_form[mmcols].apply(lambda row: ';'.join([name for name in row.values.astype(str) if name !='0']), axis=1)

In [None]:
coexp_form = coexp_form[['GENE','GWAS','LOC_NUM','Expression','Adjacency','Module Membership']]

In [None]:
print(coexp_form.shape)
print(coexp_form.head())



In [None]:
coexp_form.to_csv(f"{DATADIR}/results/coExpressionData.csv", index=False, sep=",")