# Initializing

In [None]:
# %load_ext autoreload
# %autoreload 2

import sys
import os
import re
import shutil
import random
import pprint
import itertools
import functools
import collections

import pysam
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [1]:
PROJECT_PATH = '/home/users/pjh/scripts/python_genome_packages'
sys.path.append(PROJECT_PATH)

import handygenome.common

from handygenome.common import ChromDict, Vcfspec, Interval
from handygenome.variantplus.breakends import Breakends
from handygenome.variantplus.variantplus import VariantPlus, VariantPlusList
from handygenome.variantplus.vcfplus import VcfPlus
from handygenome.igvhandle import IGVHandle

from handygenome.variantplus import vpfilter as libvpfilter

# Loading vcf

In [2]:
vcf_path = '/home/users/pjh/practice/pipeline_test/handygenome/filtered_mutect2_output/LU-87.snv.filtered.clean.annot.vcf.gz'

# vcfp: shorthand for VcfPlus
vcfp = VcfPlus(vcf_path)

# VcfPlus object

In [3]:
vcfp

<VcfPlus object>
- refver: hg19
- vplist:
    [<VariantPlus object (1	813227	.	G	T)>,
     <VariantPlus object (1	3679492	.	A	T)>,
     <VariantPlus object (1	4394080	.	T	C)>,
     <VariantPlus object (1	4923985	.	A	G)>,
     <VariantPlus object (1	5359197	.	A	G)>,
     <VariantPlus object (1	6016996	.	C	T)>,
     <VariantPlus object (1	7260937	.	T	G)>,
     <VariantPlus object (1	9319114	.	G	C)>,
     <VariantPlus object (1	14248637	.	C	A)>,
     <VariantPlus object (1	14421214	.	G	A)>,
     <VariantPlus object (1	16050377	.	C	T)>,
     <VariantPlus object (1	16666963	.	C	T)>,
     <VariantPlus object (1	19398033	.	T	A)>,
     <VariantPlus object (1	21582674	.	C	A)>,
     <VariantPlus object (1	21760630	.	G	T)>,
     <VariantPlus object (1	23445427	.	C	T)>,
     <VariantPlus object (1	25667676	.	C	A)>,
     <VariantPlus object (1	28498132	.	G	A)>,
     <VariantPlus object (1	30152796	.	C	T)>,
     <VariantPlus object (1	30401429	.	A	G)>,
     <VariantPlus object (1	32110307	.	C	T)>,
 

In [4]:
vcfp.refver

'hg19'

In [5]:
vcfp.length

2695

# Filtering

In [6]:
filt = vpfilters.get_genename_filter(['TP53', 'EGFR'])

In [7]:
vcfp.filter_vplist(filt)

In [8]:
vcfp.vplist_filtered

[<VariantPlus object (7	55122265	.	G	C)>,
 <VariantPlus object (7	55259515	.	T	G)>]

In [17]:
vcfp.vplist_filtered[1].annotdb.cosmic['max_occur_count']

2647

In [20]:
vcfp.vplist_filtered[1].annotdb.transcript_canonical

{'ENST00000275493': {'aa_change': ['L', 'R'],
                     'biotype': 'protein_coding',
                     'ccds_id': 'CCDS5514.1',
                     'chrom': 7,
                     'codon_change': ['cTg', 'cGg'],
                     'codon_frame0': 1,
                     'consequence_flags': {'is_3pUTR_involved': [38;5;196mFalse[0m,
                                           'is_5pUTR_involved': [38;5;196mFalse[0m,
                                           'is_SV_consequence': [38;5;196mFalse[0m,
                                           'is_frameshift': [38;5;196mFalse[0m,
                                           'is_missense': [38;5;40mTrue[0m,
                                           'is_not_protein_altering': [38;5;196mFalse[0m,
                                           'is_protein_altering': [38;5;40mTrue[0m,
                                           'is_splice_acceptor_involved': [38;5;196mFalse[0m,
                                        

# VariantPlus object

In [11]:
vp = vcfp.vplist_filtered[1]

In [12]:
vp.get_gene_names()

['EGFR']

In [14]:
vp.annotdb.transcript_canon_ovlp

{'ENST00000275493': {'aa_change': ['L', 'R'],
                     'biotype': 'protein_coding',
                     'ccds_id': 'CCDS5514.1',
                     'chrom': 7,
                     'codon_change': ['cTg', 'cGg'],
                     'codon_frame0': 1,
                     'consequence_flags': {'is_3pUTR_involved': [38;5;196mFalse[0m,
                                           'is_5pUTR_involved': [38;5;196mFalse[0m,
                                           'is_SV_consequence': [38;5;196mFalse[0m,
                                           'is_frameshift': [38;5;196mFalse[0m,
                                           'is_missense': [38;5;40mTrue[0m,
                                           'is_not_protein_altering': [38;5;196mFalse[0m,
                                           'is_protein_altering': [38;5;40mTrue[0m,
                                           'is_splice_acceptor_involved': [38;5;196mFalse[0m,
                                        

In [15]:
vp.annotdb.regulatory

{'ENSR00001397155': {'activity': {'A549': 'ACTIVE',
                                  'A673': 'ACTIVE',
                                  'B': 'REPRESSED',
                                  'B_PB': 'NA',
                                  'CD14_monocyte_1': 'REPRESSED',
                                  'CD14_monocyte_PB': 'NA',
                                  'CD4_CD25_ab_Treg_PB': 'NA',
                                  'CD4_ab_T': 'NA',
                                  'CD4_ab_T_PB_1': 'NA',
                                  'CD4_ab_T_PB_2': 'NA',
                                  'CD4_ab_T_Th': 'NA',
                                  'CD4_ab_T_VB': 'NA',
                                  'CD8_ab_T_CB': 'NA',
                                  'CD8_ab_T_PB': 'NA',
                                  'CMP_CD4_1': 'NA',
                                  'CMP_CD4_2': 'NA',
                                  'CMP_CD4_3': 'NA',
                                  'CM_CD4_ab_T_VB': 'NA',
    