# Mutations to Structure
This tutorial shows how to retrieve the location of mutations on the human genome (grch37), map, and visualize the mutations onto 3D structure.

In [None]:
from pyspark.sql import SparkSession
from mmtfPyspark.datasets import g2sDataset, myVariantDataset
import py3Dmol

#### Configure Spark

In [None]:
spark = SparkSession.builder.master("local[*]").appName("MutationsToStructure").getOrCreate()

## Find missense mutations for BRAF
We use the [MyVariant.info web service](https://github.com/sbl-sdsc/mmtf-pyspark/blob/master/mmtfPyspark/datasets/myVariantDataset.py) to retrieve missense mutations that have been annotated as pathogenic in the ClinVar database.

Here we retrieve mutations for BRAF using its UniProt ID.

In [None]:
uniprotIds = ['P15056']    # BRAF
# query = "clinvar.rcv.clinical_significance:pathogenic OR clinvar.rcv.clinical_significance:likely pathogenic"

query = "clinvar.rcv.clinical_significance:pathogenic"
variants = myVariantDataset.get_variations(uniprotIds, query).cache()
variants.toPandas().head(10)

#### Extract list of variant ids from dataset

In [None]:
variant_ids = variants.select("variationId").rdd.flatMap(lambda x: x).collect()

## Map variant positions onto 3D structure
Here we use the [G2S web service](https://github.com/sbl-sdsc/mmtf-pyspark/blob/master/mmtfPyspark/datasets/g2sDataset.py) to map from genomic location to a position on a PDB structure.

In [None]:
pdb_id = "3TV4"
chain_id = "A"

positions = g2sDataset.get_position_dataset(variant_ids, pdb_id, chain_id).cache()
positions.toPandas().head(10)

In [None]:
positions = positions.select("structureId","chainId","pdbPosition","pdbAminoAcid")\
                     .distinct()\
                     .sort("pdbPosition").cache()
        
positions.toPandas().head(10)

#### Extract list of mutated positions

In [None]:
pdb_positions = positions.select("pdbPosition").rdd.flatMap(lambda x: x).collect()

## View location of mutations

In [None]:
def view_structure_with_mutations(pdb_id, chain_id, group_numbers):
    
    viewer = py3Dmol.view(query='pdb:' + pdb_id)
    viewer.setStyle({})
    viewer.setStyle({'chain': chain_id}, {'line': {}})
    viewer.setStyle({'chain': chain_id, 'hetflag': True}, {'sphere':{}})
    viewer.setStyle({'chain': chain_id, 'resi': group_numbers}, {'stick': {'colorscheme': 'orangeCarbon'}})
    viewer.addResLabels({'chain': chain_id, 'resi': group_numbers}, {'fontColor':'black','showBackground': False})
    viewer.zoomTo({'chain': chain_id, 'resi': group_numbers})
    
    return viewer.show()

In [None]:
view_structure_with_mutations(pdb_id, chain_id, pdb_positions)

In [None]:
spark.stop()