## Spark's core concepts

In [0]:
executor_instances = spark.sparkContext.getConf().get('spark.executor.instances')
executor_cores = spark.sparkContext.getConf().get('spark.executor.cores')
executor_memory = spark.sparkContext.getConf().get('spark.executor.memory')

driver_cores = spark.sparkContext.getConf().get('spark.driver.cores')
driver_memory = spark.sparkContext.getConf().get('spark.driver.memory')

print(f'''
----------------------------------------
Executor instances: {executor_instances}
Executor cores: {executor_cores}
Executor memory: {executor_memory}
----------------------------------------
Driver cores: {driver_cores}
Driver memory: {driver_memory}
----------------------------------------
''')

In [0]:
spark.sparkContext.getConf().getAll()

In [0]:
import pyspark.sql.functions as F
import pyspark.sql.types as T
import datetime

In [0]:
data = [('Alice', 34), ('Bob', 45), ('Cathy', 29), ('David', 50), ('Eve', 28), ('Frank', 20), ('Grace', 42), ('Hank', 21), ('Ivy', 26), ('Jack', 40), ('Karen', 19), ('Leo', 29), ('Mona', 35), ('Nina', 48), ('Javier', 38)]
columns = ['Name', 'Age']

In [0]:
# DataFrames Pyspark
df = spark.createDataFrame(data, columns)
df.show()

In [0]:
# df.rdd.glom().collect()

In [0]:
df.filter(F.col('Age') > 30).show()

In [0]:
# DataFrames SQL
df.createOrReplaceTempView('friends')

In [None]:
spark.sql(
    '''
    SELECT *
    FROM friends
    WHERE Age > 30
    ''').show()

In [None]:
# RDDs
rdd = spark.sparkContext.parallelize(data)
rdd.collect()

In [None]:
filtered_rdd = rdd.filter(lambda x: x[1] > 30)

In [None]:
filtered_rdd.collect()

### RDDs - Example 1

In [None]:
this_is_a_variable = [i for i in range(10**6)]
this_is_a_variable[:10]

In [None]:
rdd2 = spark.sparkContext.parallelize(this_is_a_variable)

In [None]:
rdd2.getNumPartitions()

In [None]:
# l = rdd2.glom().collect()

In [None]:
filtered_rdd = rdd2.filter(lambda x: x > 10**6 - 100)

In [None]:
filtered_rdd.collect()

In [None]:
filtered_rdd.getNumPartitions()

In [None]:
filtered_rdd.glom().collect()

### RDDs - Example 2

In [None]:
# Lazy Transformation
time_to_retirement_rdd = rdd.map(lambda x: 67 - x[1])

In [None]:
time_to_retirement_rdd.collect()