# Example of using PySpark to find metal interactions

## Imports and variables

In [5]:
from pyspark import SparkConf, SparkContext                    
from mmtfPyspark.io import MmtfReader
from mmtfPyspark.interactions import InteractionFilter, GroupInteractionExtractor
from mmtfPyspark.filters import containsLProteinChain, resolution
from mmtfPyspark.webFilters import Pisces
import py3Dmol
import time
                                                               
# Create variables                                             
APP_NAME = "MMTF_Spark"                                        
#path = "./resources/mmtf_full_sample/"                            
path = "/home/marshuang80/PDB/full"

# Configure Spark                                              
conf = SparkConf().setAppName(APP_NAME).setMaster("local[*]")  
sc = SparkContext(conf=conf)                                   

## Define Variables

In [6]:
# input parameters
sequenceIdentityCutoff = 30
resolution = 2.5
minInteractions = 4
maxInteractions = 6
distanceCutoff = 3.0

# chemical component codes of metals in different oxidation states
metals = {"V","CR","MN","MN3","FE","FE2","CO","3CO","NI","3NI", "CU","CU1","CU3","ZN","MO","4MO","6MO"}

## Read PDB and create PISCES non-redundant set

In [7]:
pdb = MmtfReader.read_sequence_file(path, sc)
pdb = pdb.filter(Pisces(sequenceIdentity = sequenceIdentityCutoff, resolution = resolution))         

## Setup criteria for metal interactions

In [8]:
interactions_filter = InteractionFilter()
interactions_filter.set_distance_cutoff(distanceCutoff)
interactions_filter.set_min_interactions(minInteractions)
interactions_filter.set_max_interactions(maxInteractions)
interactions_filter.set_query_groups(True, metals)

#Exclude non-polar interactions
interactions_filter.set_target_elements(False, ['H','C','P'])

## Tabulate interactions in a Dataframe

In [9]:
interactions = GroupInteractionExtractor().get_interactions(pdb,interactions_filter).cache()
print(f"Metal interactions: {interactions.count()}")

Metal interactions: 3346


## Select interacting atoms and orientational order parameters (q4-q6)

In [10]:
interactions = interactions.select("pdbId", \
                "q4","q5","q6", \
                "element0","groupNum0","chain0", \
                "element1","groupNum1","chain1","distance1", \
                "element2","groupNum2","chain2","distance2", \
                "element3","groupNum3","chain3","distance3", \
                "element4","groupNum4","chain4","distance4", \
                "element5","groupNum5","chain5","distance5", \
                "element6","groupNum6","chain6","distance6").cache();

# show some example interactions
interactions.dropDuplicates(["pdbId"]).show(10)

+-----+----------+----------+----+--------+---------+------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+
|pdbId|        q4|        q5|  q6|element0|groupNum0|chain0|element1|groupNum1|chain1|distance1|element2|groupNum2|chain2|distance2|element3|groupNum3|chain3|distance3|element4|groupNum4|chain4|distance4|element5|groupNum5|chain5|distance5|element6|groupNum6|chain6|distance6|
+-----+----------+----------+----+--------+---------+------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+
| 1M4L|0.81025696|0.42569426|null|      Zn|     1308|     A|       N|      196|     A|  2.04484|       O|     1324|     A| 1.960923|       O|       72|     A|2.2696981| 

# Count Unique interactions by metal

In [11]:
print("Unique interactions by metal: ")
interactions.groupBy(['element0']).count().sort("count").show()

Unique interactions by metal: 
+--------+-----+
|element0|count|
+--------+-----+
|      Mo|   23|
|      Co|   86|
|      Cu|   88|
|      Ni|  167|
|      Fe|  408|
|      Mn|  435|
|      Zn| 2139|
+--------+-----+



## Terminate Spark

In [12]:
sc.stop()