```
                             rs7412 
                             NC_000019.10:g.44908822
                             C          T
rs429358                 C   APOE-ε4    APOE-ε1
NC_000019.10:g.44908684  T   APOE-ε3    APOE-ε2
http://snpedia.com/index.php/APOE
```

# Setup

In [1]:
import json
import pprint

from vmc.richmodels import Identifier, Interval, Locus, Allele, Haplotype, Genotype
import vmc.codecs.json

def to_json(o):
    return json.dumps(o, indent=2, sort_keys=True, cls=vmc.codecs.json.JSONEncoder, ensure_ascii=False)

In [2]:
sr = "NCBI:NC_000019.10"
intervals = {
    "rs429358": Interval(44908683, 44908684),
    "rs7412": Interval(44908821, 44908822),
    }

o = intervals["rs429358"]
print("r={o!r}\ns={o}\nj={j}".format(o=o, j=to_json(o)))

r=Interval(start=44908683, end=44908684)
s=44908683:44908684
j={
  "end": 44908684,
  "start": 44908683
}


In [3]:
loci = {
    "rs429358": Locus(sr, intervals["rs429358"]),
    "rs7412": Locus(sr, intervals["rs7412"]),
}
o = loci["rs429358"]
print("r={o!r}\ns={o}\nid={o.identifier}\nj={j}".format(o=o, j=to_json(o)))

r=Locus(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), id=None)
s=NCBI:NC_000019.10:44908683:44908684
id=GL:4QXqkmv3lDN8BE_LmzgDwiCm8hPvw3pi
j={
  "id": null,
  "location": {
    "end": 44908684,
    "start": 44908683
  },
  "seqref": "NCBI:NC_000019.10"
}


# Alleles

In [4]:
alleles = {
    "rs429358T": Allele(sr, intervals["rs429358"], "T"),
    "rs429358C": Allele(sr, intervals["rs429358"], "C"),
    "rs7412T":   Allele(sr, intervals["rs7412"],   "T"),
    "rs7412C":   Allele(sr, intervals["rs7412"],   "C"),
    }
o = alleles["rs429358C"]
print("r={o!r}\ns={o}\nid={o.identifier}\nj={j}".format(o=o, j=to_json(o)))

r=Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id=None)
s=NCBI:NC_000019.10:44908683:44908684:C
id=GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo
j={
  "id": null,
  "location": {
    "end": 44908684,
    "start": 44908683
  },
  "replacement": "C",
  "seqref": "NCBI:NC_000019.10"
}


# Haplotypes

In [5]:
haplotypes = {
    "ε1": Haplotype([alleles["rs429358C"], alleles["rs7412T"]]),
    "ε2": Haplotype([alleles["rs429358T"], alleles["rs7412T"]]),
    "ε3": Haplotype([alleles["rs429358T"], alleles["rs7412C"]]),
    "ε4": Haplotype([alleles["rs429358C"], alleles["rs7412C"]]),
    "ε4r": Haplotype([alleles["rs7412C"], alleles["rs429358C"]]),
    }

o = haplotypes["ε1"]
print("r={o!r}\ns={o}\nid={o.identifier}\nj={j}".format(o=o, j=to_json(o)))

r=Haplotype(alleles=[Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id='GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo'), Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908821, end=44908822), replacement='T', id='GA:uynDR_lCUsmDMN0LWAbpQZMY3zDtCFN8')], id='GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW')
s=Haplotype(alleles=[Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id='GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo'), Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908821, end=44908822), replacement='T', id='GA:uynDR_lCUsmDMN0LWAbpQZMY3zDtCFN8')], id='GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW')
id=GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW
j={
  "allele_ids": [
    "GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo",
    "GA:uynDR_lCUsmDMN0LWAbpQZMY3zDtCFN8"
  ],
  "id": "GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW"
}


In [6]:
haplotype_id_name_map = {h.identifier: n for n, h in haplotypes.items()}
print(haplotype_id_name_map)

{'GH:2gEuysBG_-0WpJgLxp0eTnsP48bZ-xgs': 'ε4', 'GH:XmtlBZ0GGIQ8kp6v_rR8M8bsSUx9T3xx': 'ε3', 'GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW': 'ε1', 'GH:rVxlCAFnc-TBK0hlW99Z35Ewu7LatGlX': 'ε2'}


# Genotypes/Diplotypes

In [7]:
genotypes = {
    "{}/{}".format(h1n, h2n): Genotype([h1, h2])
    for h1n, h1 in haplotypes.items()
    for h2n, h2 in haplotypes.items()
    }

o = genotypes["ε1/ε1"]
print("r={o!r}\ns={o}\nid={o.identifier}\nj={j}".format(o=o, j=to_json(o)))

r=Genotype(haplotypes=[Haplotype(alleles=[Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id='GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo'), Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908821, end=44908822), replacement='T', id='GA:uynDR_lCUsmDMN0LWAbpQZMY3zDtCFN8')], id='GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW'), Haplotype(alleles=[Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id='GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo'), Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908821, end=44908822), replacement='T', id='GA:uynDR_lCUsmDMN0LWAbpQZMY3zDtCFN8')], id='GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW')], id='GG:ZVRWU2xPwJkExwxT1JZsJ9tuxjmMOx8h')
s=Genotype(haplotypes=[Haplotype(alleles=[Allele(seqref='NCBI:NC_000019.10', location=Interval(start=44908683, end=44908684), replacement='C', id='GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo'), Allele(seqref='NCBI:NC_000019.10', location=In

In [10]:
for k, o in sorted(genotypes.items(), key=lambda kv: kv[0]):
    print(k, o.identifier)

ε1/ε1 GG:ZVRWU2xPwJkExwxT1JZsJ9tuxjmMOx8h
ε1/ε2 GG:VTvW9slHtWtiRy60jxUT7NKP8LcCSzNA
ε1/ε3 GG:VsizGjqNIRdqc9tXPG8C6xbdy75gTa1r
ε1/ε4 GG:uwjP68rM32tnfUzyEI0C3UYJuHzSgbKy
ε1/ε4r GG:uwjP68rM32tnfUzyEI0C3UYJuHzSgbKy
ε2/ε1 GG:VTvW9slHtWtiRy60jxUT7NKP8LcCSzNA
ε2/ε2 GG:RBJA8Q81fsI3dD6_YjD0QPfH98c-k-Wf
ε2/ε3 GG:F3Da37ph0Y50PlJqqEEt6xCWfXJc_9aP
ε2/ε4 GG:MkuyYK5K2k_YirfZPRegkKkHPqCIp_Cj
ε2/ε4r GG:MkuyYK5K2k_YirfZPRegkKkHPqCIp_Cj
ε3/ε1 GG:VsizGjqNIRdqc9tXPG8C6xbdy75gTa1r
ε3/ε2 GG:F3Da37ph0Y50PlJqqEEt6xCWfXJc_9aP
ε3/ε3 GG:q2cOY8JoJjYXk5Rfew6184zlV56mj8JO
ε3/ε4 GG:Ggpm08lTnnBhcq3Jgk8-igB1Yjl4sxG9
ε3/ε4r GG:Ggpm08lTnnBhcq3Jgk8-igB1Yjl4sxG9
ε4/ε1 GG:uwjP68rM32tnfUzyEI0C3UYJuHzSgbKy
ε4/ε2 GG:MkuyYK5K2k_YirfZPRegkKkHPqCIp_Cj
ε4/ε3 GG:Ggpm08lTnnBhcq3Jgk8-igB1Yjl4sxG9
ε4/ε4 GG:4BmAxQ_6o1RTb6JIqNPT752nmoYNTR4C
ε4/ε4r GG:4BmAxQ_6o1RTb6JIqNPT752nmoYNTR4C
ε4r/ε1 GG:uwjP68rM32tnfUzyEI0C3UYJuHzSgbKy
ε4r/ε2 GG:MkuyYK5K2k_YirfZPRegkKkHPqCIp_Cj
ε4r/ε3 GG:Ggpm08lTnnBhcq3Jgk8-igB1Yjl4sxG9
ε4r/ε4 GG:4BmAxQ_6o1RTb6JIq

# Associating data

## Using Haplotype Names

In [11]:
gt = genotypes["ε1/ε2"]

In [13]:
gt.haplotype_ids()

['GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW', 'GH:rVxlCAFnc-TBK0hlW99Z35Ewu7LatGlX']

In [16]:
[haplotype_id_name_map[hid] for hid in gt.haplotype_ids()]

['ε1', 'ε2']

# Document Example

In [20]:
patient_data = {
    "sample-id": "e89c387a-b539-11e6-9d82-fb96077e5724",
    "date": "2016-11-27T00:00:00",
    
    "vmc:alleles": alleles,
    "vmc:haplotypes": haplotypes,
    "vmc:genotypes": genotypes,

    "haplotype_names": haplotype_id_name_map,

    "clinical significance": {
        "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE": "increased risk",
        }
    }
print(to_json(patient_data))

{
  "clinical significance": {
    "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE": "increased risk"
  },
  "date": "2016-11-27T00:00:00",
  "haplotype_names": {
    "GH:2gEuysBG_-0WpJgLxp0eTnsP48bZ-xgs": "ε4",
    "GH:XmtlBZ0GGIQ8kp6v_rR8M8bsSUx9T3xx": "ε3",
    "GH:ZK__hBg12xTo-H7s_60s2nAD3WYxSnoW": "ε1",
    "GH:rVxlCAFnc-TBK0hlW99Z35Ewu7LatGlX": "ε2"
  },
  "sample-id": "e89c387a-b539-11e6-9d82-fb96077e5724",
  "vmc:alleles": {
    "rs429358C": {
      "id": "GA:HW17jzWbPWQyIcRdwwyeKvuBocJSAilo",
      "location": {
        "end": 44908684,
        "start": 44908683
      },
      "replacement": "C",
      "seqref": "NCBI:NC_000019.10"
    },
    "rs429358T": {
      "id": "GA:s9484RoL0-BQlf1sppO7HmDriWL4GHjx",
      "location": {
        "end": 44908684,
        "start": 44908683
      },
      "replacement": "T",
      "seqref": "NCBI:NC_000019.10"
    },
    "rs7412C": {
      "id": "GA:5Kz4eJfHVW2mKcGjY3rgjQAUbGAr-aBT",
      "location": {
        "end": 44908822,
        "start": 449088