### Create DataFrame from sample data

In [0]:
file_location = "/FileStore/tables/customers.csv"
df = (
  spark.read.format("csv")
  .option("inferSchema", True) 
  .option("header", True) 
  .option("sep", ',') 
  .load(file_location)
)

### Number of Records

In [0]:
df.count()

Out[3]: 500

### Default Partitions

In [0]:
df.rdd.getNumPartitions()

Out[4]: 1

### Get the number of records per Partition

In [0]:
from pyspark.sql import functions as f
df.withColumn('partition_id', f.spark_partition_id()).groupby('partition_id').count().show()

+------------+-----+
|partition_id|count|
+------------+-----+
|           0|  500|
+------------+-----+



### Repartition the Dataframe

In [0]:
df_repart = df.repartition(10)

print(df_repart.rdd.getNumPartitions())

10


### Get the number of records per Partition after repartition

In [0]:
df_repart.withColumn('partition_id', f.spark_partition_id()).groupby('partition_id').count().orderBy('partition_id').show()

+------------+-----+
|partition_id|count|
+------------+-----+
|           0|   50|
|           1|   50|
|           2|   50|
|           3|   50|
|           4|   50|
|           5|   50|
|           6|   50|
|           7|   50|
|           8|   50|
|           9|   50|
+------------+-----+

