```
                             rs7412 
                             NC_000019.10:g.44908822
                             C          T
rs429358                 C   APOE-ε4    APOE-ε1
NC_000019.10:g.44908684  T   APOE-ε3    APOE-ε2
http://snpedia.com/index.php/APOE
```

# Setup

In [1]:
import json
import pprint

from vmc.models import ObjectReference, Interval, Allele, Haplotype, Genotype
import vmc.codecs.json

def to_json(o):
    return json.dumps(o, indent=2, sort_keys=True, cls=vmc.codecs.json.JSONEncoder, ensure_ascii=False)

In [2]:
sr = ObjectReference(namespace="NCBI", accession="NC_000019.10")
intervals = {
    "rs429358": Interval(44908683, 44908684),
    "rs7412": Interval(44908821, 44908822),
    }

# Alleles

In [3]:
alleles = {
    "rs429358T": Allele(sr, intervals["rs429358"], "T"),
    "rs429358C": Allele(sr, intervals["rs429358"], "C"),
    "rs7412T":   Allele(sr, intervals["rs7412"],   "T"),
    "rs7412C":   Allele(sr, intervals["rs7412"],   "C"),
    }

In [4]:
alleles["rs429358C"]

Allele(seqref=ObjectReference(namespace='NCBI', accession='NC_000019.10'), interval=Interval(start=44908683, end=44908684), replacement='C')

In [5]:
str(alleles["rs429358C"])

'NCBI/NC_000019.10:<44908683,44908684>:C'

In [6]:
print(to_json(alleles))

{
  "rs429358C": {
    "id": "VA/ZgOnWTvuRsLGoqTp6WAgJuFvrPVulgY2",
    "interval": {
      "end": 44908684,
      "start": 44908683
    },
    "replacement": "C",
    "seqref": {
      "accession": "NC_000019.10",
      "namespace": "NCBI"
    }
  },
  "rs429358T": {
    "id": "VA/SmtH5_lBB_j8bUg-UnjEijMhM8YLqkP8",
    "interval": {
      "end": 44908684,
      "start": 44908683
    },
    "replacement": "T",
    "seqref": {
      "accession": "NC_000019.10",
      "namespace": "NCBI"
    }
  },
  "rs7412C": {
    "id": "VA/fnwGfGdUbdLRbIIkEx1lIzO77o8pgFzv",
    "interval": {
      "end": 44908822,
      "start": 44908821
    },
    "replacement": "C",
    "seqref": {
      "accession": "NC_000019.10",
      "namespace": "NCBI"
    }
  },
  "rs7412T": {
    "id": "VA/f0l64_B4mOriqevzpV7ip5rwUCo7oKmA",
    "interval": {
      "end": 44908822,
      "start": 44908821
    },
    "replacement": "T",
    "seqref": {
      "accession": "NC_000019.10",
      "namespace": "NCBI"
    }
  }
}


# Haplotypes

In [7]:
haplotypes = {
    "ε1": Haplotype([alleles["rs429358C"], alleles["rs7412T"]]),
    "ε2": Haplotype([alleles["rs429358T"], alleles["rs7412T"]]),
    "ε3": Haplotype([alleles["rs429358T"], alleles["rs7412C"]]),
    "ε4": Haplotype([alleles["rs429358C"], alleles["rs7412C"]]),
    }

In [8]:
print(to_json(haplotypes))

{
  "ε1": {
    "allele_ids": [
      "VA/ZgOnWTvuRsLGoqTp6WAgJuFvrPVulgY2",
      "VA/f0l64_B4mOriqevzpV7ip5rwUCo7oKmA"
    ],
    "id": "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ"
  },
  "ε2": {
    "allele_ids": [
      "VA/SmtH5_lBB_j8bUg-UnjEijMhM8YLqkP8",
      "VA/f0l64_B4mOriqevzpV7ip5rwUCo7oKmA"
    ],
    "id": "VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ"
  },
  "ε3": {
    "allele_ids": [
      "VA/SmtH5_lBB_j8bUg-UnjEijMhM8YLqkP8",
      "VA/fnwGfGdUbdLRbIIkEx1lIzO77o8pgFzv"
    ],
    "id": "VH/hg4D9sgompp7-aZdW1QfvvL1di88jlbo"
  },
  "ε4": {
    "allele_ids": [
      "VA/ZgOnWTvuRsLGoqTp6WAgJuFvrPVulgY2",
      "VA/fnwGfGdUbdLRbIIkEx1lIzO77o8pgFzv"
    ],
    "id": "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE"
  }
}


In [9]:
haplotype_names = {str(h.digest): n for n, h in haplotypes.items()}
print(haplotype_names)

{'VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE': 'ε4', 'VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ': 'ε2', 'VH/hg4D9sgompp7-aZdW1QfvvL1di88jlbo': 'ε3', 'VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ': 'ε1'}


# Genotypes

In [10]:
genotypes = {
    "{}/{}".format(h1n, h2n): Genotype([h1, h2])
    for h1n, h1 in haplotypes.items()
    for h2n, h2 in haplotypes.items()
    }

In [11]:
print(to_json(genotypes["ε1/ε2"]))

{
  "haplotype_ids": [
    "VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ",
    "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ"
  ],
  "id": "VG/iTyb-uti8xInAThWtb_eswICKFwyLxTB"
}


In [12]:
print(to_json(genotypes))

{
  "ε1/ε1": {
    "haplotype_ids": [
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ",
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ"
    ],
    "id": "VG/5oG2t7wmQwnqnuaT284Z06eB4Ngg7clr"
  },
  "ε1/ε2": {
    "haplotype_ids": [
      "VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ",
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ"
    ],
    "id": "VG/iTyb-uti8xInAThWtb_eswICKFwyLxTB"
  },
  "ε1/ε3": {
    "haplotype_ids": [
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ",
      "VH/hg4D9sgompp7-aZdW1QfvvL1di88jlbo"
    ],
    "id": "VG/_rdkwQ9Z4t_FB9pDq0KjgwnPnKCFgyNU"
  },
  "ε1/ε4": {
    "haplotype_ids": [
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ",
      "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE"
    ],
    "id": "VG/RFghM2gyvmY9Ny5n9rlYDLb6t6PjL1Iu"
  },
  "ε2/ε1": {
    "haplotype_ids": [
      "VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ",
      "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ"
    ],
    "id": "VG/iTyb-uti8xInAThWtb_eswICKFwyLxTB"
  },
  "ε2/ε2": {
    "haplotype_ids": [
      "VH/6MjG72eg8qL83EyaKD7SoGQz-t1

In [13]:
for k, o in sorted(genotypes.items(), key=lambda kv: kv[0]):
    print(k, o.digest)

ε1/ε1 VG/5oG2t7wmQwnqnuaT284Z06eB4Ngg7clr
ε1/ε2 VG/iTyb-uti8xInAThWtb_eswICKFwyLxTB
ε1/ε3 VG/_rdkwQ9Z4t_FB9pDq0KjgwnPnKCFgyNU
ε1/ε4 VG/RFghM2gyvmY9Ny5n9rlYDLb6t6PjL1Iu
ε2/ε1 VG/iTyb-uti8xInAThWtb_eswICKFwyLxTB
ε2/ε2 VG/ypqEeToHiSSDFs0em9sUBaSkzYMu3-Kz
ε2/ε3 VG/Faiqz1FbhxCa9o0UhbqLATOLr5E1G0c_
ε2/ε4 VG/Pt0NsYV0B72VrO-PPfRjq2YdcWlh9aPN
ε3/ε1 VG/_rdkwQ9Z4t_FB9pDq0KjgwnPnKCFgyNU
ε3/ε2 VG/Faiqz1FbhxCa9o0UhbqLATOLr5E1G0c_
ε3/ε3 VG/MZ9o8m6mizY4tl1YFTauWuR9Rmly4CjA
ε3/ε4 VG/ebDtZ7MAG1J-W2Ajkg00GHqcgY_7vFWl
ε4/ε1 VG/RFghM2gyvmY9Ny5n9rlYDLb6t6PjL1Iu
ε4/ε2 VG/Pt0NsYV0B72VrO-PPfRjq2YdcWlh9aPN
ε4/ε3 VG/ebDtZ7MAG1J-W2Ajkg00GHqcgY_7vFWl
ε4/ε4 VG/q83AVv0AxCyGpS00Ysq_8rxIyip8mf9S


# Associating data

## Using Haplotype Names

In [14]:
gt = genotypes["ε1/ε2"]

In [15]:
gt.haplotype_ids

['VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ', 'VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ']

In [16]:
[haplotype_names[hid] for hid in gt.haplotype_ids]

['ε2', 'ε1']

# Document Example

In [17]:
patient_data = {
    "sample-id": "e89c387a-b539-11e6-9d82-fb96077e5724",
    "date": "2016-11-27T00:00:00",
    
    "vmc:alleles": alleles,
    "vmc:haplotypes": haplotypes,
    "vmc:genotypes": genotypes,

    "haplotype_names": haplotype_names,

    "clinical significance": {
        "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE": "increased risk",
        }
    }

In [18]:
print(to_json(patient_data))

{
  "clinical significance": {
    "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE": "increased risk"
  },
  "date": "2016-11-27T00:00:00",
  "haplotype_names": {
    "VH/6MjG72eg8qL83EyaKD7SoGQz-t1YyqsQ": "ε2",
    "VH/Jdha5Mgdx-zt16AKbDPDRBhWzaa1DMMZ": "ε1",
    "VH/hg4D9sgompp7-aZdW1QfvvL1di88jlbo": "ε3",
    "VH/q8_JMk85MxhmFXOAGYsf4aFoHuOyfAJE": "ε4"
  },
  "sample-id": "e89c387a-b539-11e6-9d82-fb96077e5724",
  "vmc:alleles": {
    "rs429358C": {
      "id": "VA/ZgOnWTvuRsLGoqTp6WAgJuFvrPVulgY2",
      "interval": {
        "end": 44908684,
        "start": 44908683
      },
      "replacement": "C",
      "seqref": {
        "accession": "NC_000019.10",
        "namespace": "NCBI"
      }
    },
    "rs429358T": {
      "id": "VA/SmtH5_lBB_j8bUg-UnjEijMhM8YLqkP8",
      "interval": {
        "end": 44908684,
        "start": 44908683
      },
      "replacement": "T",
      "seqref": {
        "accession": "NC_000019.10",
        "namespace": "NCBI"
      }
    },
    "rs7412C": {
      "id