# Transformations
Transformations are lazy in nature and they will not be executed until an Action is executed on top of them. Let’s try to understand various available Transformations.

In [3]:
# show locaton of the spark
import findspark
findspark.init()

In [4]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark import SparkContext

In [5]:
#Creating Spark Context
pyspark = SparkSession.builder.master("local[4]").appName("Basic ").getOrCreate()
sc = pyspark.sparkContext

In [6]:
numbers = [1, 2, 5, 6, 8, 15, 4, 9, 7, 2]
numbers_rdd = sc.parallelize(numbers)
numbers_rdd.take(10)

[1, 2, 5, 6, 8, 15, 4, 9, 7, 2]

In [7]:
words = ["Fake it till make it", "Least said, soonest mended.", "Doing is better than saying."]
words_rdd = sc.parallelize(words)
words_rdd.take(2)

['Fake it till make it', 'Least said, soonest mended.']

## 1) Map
This will map your input to some output based on the function specified in the map function.

In [8]:
numbers_rdd.map(lambda x: x*x).take(10)

[1, 4, 25, 36, 64, 225, 16, 81, 49, 4]

In [9]:
words_rdd.map(lambda x: x.upper()).take(3)

['FAKE IT TILL MAKE IT',
 'LEAST SAID, SOONEST MENDED.',
 'DOING IS BETTER THAN SAYING.']

## 2) Filter
To filter the data based on a certain condition. 

In [10]:
numbers_rdd.filter(lambda x: x<5).take(10)

[1, 2, 4, 2]

## 3) FlatMap
 This function is very similar to map, but can return multiple elements for each input in the given RDD

In [11]:
words_rdd.flatMap(lambda x: x.upper()).take(7)

['F', 'A', 'K', 'E', ' ', 'I', 'T']

In [12]:
words_rdd.flatMap(lambda x:x.split(" ")).take(3)

['Fake', 'it', 'till']

In [13]:
words_rdd.flatMap(lambda x:x.split(" ")).map(lambda x: x.upper()).take(10)

['FAKE',
 'IT',
 'TILL',
 'MAKE',
 'IT',
 'LEAST',
 'SAID,',
 'SOONEST',
 'MENDED.',
 'DOING']

## 4) Distinct
This will return distinct elements from a RDD

In [14]:
numbers_rdd.distinct().take(5)

[8, 4, 1, 5, 9]

## 5) Sample
Return a sampled subset of a RDD.

In [18]:
numbers_rdd.sample(True, 0.5, 42).take(10)

[6, 15, 4]