# Map To List Demo

This example shows how to filter pdb proteins by X-Ray Diffraction, and store information (protein name, resolution, rFree, rWork) of the results in a list

## Imports

In [1]:
from pyspark.sql import SparkSession
from mmtfPyspark.filters import ExperimentalMethods
from mmtfPyspark.io import mmtfReader

#### Configure Spark 

In [2]:
spark = SparkSession.builder.appName("MapToListDemo").getOrCreate()

## Read in MMTF Files and sample a small fraction

In [3]:
path = "../../resources/mmtf_full_sample/"
fraction = 0.001
seed = 123

pdb = mmtfReader.read_sequence_file(path, fraction = fraction, seed = seed)

## Filter by X-Ray Diffraction experimental method

In [4]:
pdb = pdb.filter(ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION))

## Map results to a list of information, and print each list

In [5]:
pdb.map(lambda t: [t[0], t[1].resolution, t[1].r_free, t[1].r_work]).collect()

[['5GS7', 1.5, 0.23747000098228455, 0.2085999995470047],
 ['4G92', 1.7999999523162842, 0.19008000195026398, 0.1567399948835373],
 ['2YVE', 1.399999976158142, 0.22300000488758087, 0.2029999941587448],
 ['3BHD', 1.5, 0.20116999745368958, 0.1764799952507019],
 ['1Y6Z', 1.8799999952316284, 0.26750001311302185, 0.20604999363422394],
 ['4QKW', 1.7000000476837158, 0.24490000307559967, 0.21230000257492065],
 ['5ET3', 1.6710000038146973, 0.2443999946117401, 0.21809999644756317],
 ['4LLD', 1.190000057220459, 0.16991999745368958, 0.1447100043296814],
 ['3KK4', 1.9500000476837158, 0.1992100030183792, 0.14518000185489655],
 ['2HHC', 1.5399999618530273, 0.19699999690055847, 0.18199999630451202],
 ['1ETE', 2.200000047683716, 0.28700000047683716, 0.23899999260902405],
 ['1WDC', 2.0, 0.2809999883174896, 0.1940000057220459],
 ['1UCD', 1.2999999523162842, 0.2029999941587448, 0.20000000298023224]]

## Terminate Spark

In [6]:
spark.stop()