# Create trio pedigrees in the FarGen cohort tree

## Setup

Connect to the Neo4j DB.

In [73]:
from neo4j import GraphDatabase
import logging
logging.basicConfig(level=logging.INFO)

In [9]:
# URL to connect to the Neo4j database where the genealogy is stored.
neo4j_url = 'bolt://aebs-db:7687'

# Connect to the DB.
driver = GraphDatabase.driver(neo4j_url)
session = driver.session()

## Label FarGen participants

Get the samplenames from a CSV file.

In [86]:
# CSV file
samplenames_fn = '/home/olavur/experiments/2020-11-13_fargen1_exome_analysis/data/genealogy/fargen_rin_samplename.csv'
with open(samplenames_fn) as fid:
    sample_list = []  # List of sample names.
    rin_list = []  # List of RIN (genealogy) IDs.
    output_data = dict()  # Data to write to CSV will be stored in this dictionary.
    _ = fid.readline()  # Discard header.
    for line in fid:
        # Get the RIN and samplename from this line.
        rin, sample = line.strip().split(',')
        rin_list.append(rin)
        output_data[rin] = {'sample': sample}
        sample_list.append([rin, sample])

In [87]:
print('Number of samples: {n}'.format(n=len(rin_list)))

Number of samples: 1534


Look-up all probands in database.

In [35]:
# Label all FarGen participants.
_ = session.run('UNWIND $inds AS x MATCH (proband:Person {ind: x}) SET proband:FarGen', inds=rin_list)
# Count the number of participants found in database.
result = session.run('MATCH (p:FarGen) RETURN COUNT(p)')
print('Found {n} out of {m} probands.'.format(n=result.value()[0], m=len(rin_list)))

Found 1534 out of 1534 probands.


Label all probands with FN number.

In [124]:
# Label all FarGen participants.
result = session.run('UNWIND $sample AS x MATCH (proband:Person {ind: x[0]}) SET proband.FN = x[1] RETURN count(*)', sample=sample_list)
value = result.value()
print('Set FN number of {n} probands.'.format(n=value[0]))

Set FN number of 1534 probands.


## Find trios

In [126]:
result = session.run('MATCH path = (mother:FarGen)-[:is_mother]->(child:FarGen)<-[:is_father]-(father:FarGen) RETURN child.FN, father.FN, mother.FN, child.sex')
trio_ind_list = result.values()

In [127]:
child_inds = [x[0] for x in trio_ind_list]
child_inds = list(set(child_inds))

father_inds = [x[1] for x in trio_ind_list]
father_inds = list(set(father_inds))

mother_inds = [x[2] for x in trio_ind_list]
mother_inds = list(set(mother_inds))

In [128]:
print('{t} trios with {f} fathers, {m} mothers and {c} children.'.format(t=len(trio_ind_list), f=len(father_inds), m=len(mother_inds), c=len(child_inds)))

101 trios with 73 fathers, 73 mothers and 101 children.


## Write fam file

In [129]:
with open('/home/olavur/experiments/2020-11-13_fargen1_exome_analysis/data/genealogy/trios.fam', 'w') as fid:
    for c, f, m, s in trio_ind_list:
        assert (s == 'F') or (s == 'M'), "Sex must be one of 'F' or 'M'. Offending value: {s}".format(s=s)
        if s == 'M':
            s = 1
        elif s == 'F':
            s = 2
        fid.write('1\t{c}\t{f}\t{m}\t{s}\t0\n'.format(c=c, f=f, m=m, s=s))