# Simple Zinc Interaction Analysis Example

<img src="./figures/zinc_interaction.png" style="width: 300px;"/>

In [1]:
from pyspark.sql import SparkSession
from mmtfPyspark.datasets import groupInteractionExtractor
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.webfilters import Pisces

#### Configure Spark 

In [2]:
spark = SparkSession.builder.master("local[4]").appName("InteractionAnalysisSimple").getOrCreate()

## Read PDB in MMTF format

In [3]:
path = "../../resources/mmtf_full_sample/"

pdb = mmtfReader.read_sequence_file(path)

## Create a non-redundant subset
Create a Pisces (Protein Sequence Culling Server, http://dunbrack.fccc.edu/PISCES.php) non-redundant subset for this analysis. Select protein chains with less than 40% sequence identity among each other and a resolution of 2.0 A or better.

In [4]:
pdb = pdb.filter(Pisces(sequenceIdentity = 40, resolution = 2.0))

## Extract proteins with Zn interactions

In [5]:
finder = groupInteractionExtractor("ZN",3)

interactions = finder.get_dataset(pdb).cache()

## List the top 10 residue types that interact with Zn

In [6]:
interactions.printSchema()

interactions.show(20)

print(f"Number of interactions: {interactions.count()}")

root
 |-- structureId: string (nullable = false)
 |-- residue1: string (nullable = false)
 |-- atom1: string (nullable = false)
 |-- element1: string (nullable = false)
 |-- index1: integer (nullable = false)
 |-- residue2: string (nullable = false)
 |-- atom2: string (nullable = false)
 |-- element2: string (nullable = false)
 |-- index2: integer (nullable = false)
 |-- distance: float (nullable = false)

+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
|structureId|residue1|atom1|element1|index1|residue2|atom2|element2|index2| distance|
+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
|       1LBU|      ZN|   ZN|      Zn|   213|     HIS|  NE2|       N|   153|2.1519165|
|       1LBU|      ZN|   ZN|      Zn|   213|     ASP|   CG|       C|   160|2.8048582|
|       1LBU|      ZN|   ZN|      Zn|   213|     ASP|  OD1|       O|   160|1.9849179|
|       1LBU|      ZN|   ZN|      Zn|   213|     HIS|  ND1|       N|   196

## Show the top 10 interacting groups

In [7]:
interactions.groupBy("residue2") \
            .count() \
            .sort("count", ascending = False) \
            .show(10)

+--------+-----+
|residue2|count|
+--------+-----+
|     HIS| 2396|
|     CYS| 1721|
|     GLU| 1273|
|     HOH| 1198|
|     ASP| 1190|
|     ACT|  139|
|      CL|   56|
|     CAC|   44|
|      ZN|   44|
|     SER|   41|
+--------+-----+
only showing top 10 rows



## Terminate Spark

In [8]:
spark.stop()