# Example of using PySpark to find metal interactions

## Imports and variables

In [1]:
from pyspark import SparkConf, SparkContext                    
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.interactions import InteractionFilter, GroupInteractionExtractor
from mmtfPyspark.filters import ContainsLProteinChain, Resolution
from mmtfPyspark.webFilters import Pisces
import py3Dmol
import time
                                                               
# Create variables                                             
APP_NAME = "MMTF_Spark"                                        
path = "./resources/mmtf_full_sample/"                            

# Configure Spark                                              
conf = SparkConf().setAppName(APP_NAME).setMaster("local[*]")  
sc = SparkContext(conf=conf)                                   

## Define Variables

In [2]:
# input parameters
sequenceIdentityCutoff = 30
resolution = 2.5
minInteractions = 4
maxInteractions = 6
distanceCutoff = 3.0

# chemical component codes of metals in different oxidation states
metals = {"V","CR","MN","MN3","FE","FE2","CO","3CO","NI","3NI", "CU","CU1","CU3","ZN","MO","4MO","6MO"}

## Read PDB and create PISCES non-redundant set

In [5]:
pdb = mmtfReader.read_sequence_file(path, sc)
pdb = pdb.filter(Pisces(sequenceIdentity = sequenceIdentityCutoff, resolution = resolution))         

## Setup criteria for metal interactions

In [6]:
interactions_filter = InteractionFilter()
interactions_filter.set_distance_cutoff(distanceCutoff)
interactions_filter.set_min_interactions(minInteractions)
interactions_filter.set_max_interactions(maxInteractions)
interactions_filter.set_query_groups(True, metals)

#Exclude non-polar interactions
interactions_filter.set_target_elements(False, ['H','C','P'])

## Tabulate interactions in a Dataframe

In [7]:
interactions = GroupInteractionExtractor().get_interactions(pdb,interactions_filter).cache()
print(f"Metal interactions: {interactions.count()}")

Metal interactions: 52


## Select interacting atoms and orientational order parameters (q4-q6)

In [8]:
interactions = interactions.select("pdbId", \
                "q4","q5","q6", \
                "element0","groupNum0","chain0", \
                "element1","groupNum1","chain1","distance1", \
                "element2","groupNum2","chain2","distance2", \
                "element3","groupNum3","chain3","distance3", \
                "element4","groupNum4","chain4","distance4", \
                "element5","groupNum5","chain5","distance5", \
                "element6","groupNum6","chain6","distance6").cache();

# show some example interactions
interactions.dropDuplicates(["pdbId"]).show(10)

+-----+----------+----------+----------+--------+---------+------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+
|pdbId|        q4|        q5|        q6|element0|groupNum0|chain0|element1|groupNum1|chain1|distance1|element2|groupNum2|chain2|distance2|element3|groupNum3|chain3|distance3|element4|groupNum4|chain4|distance4|element5|groupNum5|chain5|distance5|element6|groupNum6|chain6|distance6|
+-----+----------+----------+----------+--------+---------+------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+--------+---------+------+---------+
| 1A2P|0.54589266|      null|      null|      Zn|      112|     C|       O|      139|     C|1.9944315|       N|       62|     C| 2.084901|       O|    

# Count Unique interactions by metal

In [9]:
print("Unique interactions by metal: ")
interactions.groupBy(['element0']).count().sort("count").show()

Unique interactions by metal: 
+--------+-----+
|element0|count|
+--------+-----+
|      Ni|    1|
|      Cu|    2|
|      Fe|    9|
|      Mn|   12|
|      Zn|   28|
+--------+-----+



## Terminate Spark

In [10]:
sc.stop()