### Partition Example

![](Images/71/71 Example.jpg)

### Bucketing

![](Images/71/71 Bucketing.jpg)

### Check if Bucketing Enabled

In [0]:
spark.conf.get("spark.sql.sources.bucketing.enabled")  # bucketing is not enabled

### Create Sample Data for Demo

In [0]:
from pyspark.sql.functions import col, rand
df = spark.range(1, 10000, 1, 10).select(col("id").alias("PK"), rand(10).alias("Attribute"))  
df.display()


In [0]:
df.count()

### Create Non Bucketed Table

In [0]:
df.write.format("parquet").saveAsTable("nonbucketedTable")

In [0]:
df.write.format("parquet").bucketBy(10, "PK").saveAsTable("bucketedTable")

### Create Bucketed and Non Bucketed Dataframes for Demo

In [0]:
df1 = spark.table("bucketedTable")
df2 = spark.table("bucketedTable")

df3 = spark.table("nonbucketedTable")
df4 = spark.table("nonbucketedTable")

### Broadcast Join by Default if less than 10 MB

In [0]:
df3.join(df4, "PK", "inner").explain()

### Disable Broadcast Join

In [0]:
spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)
spark.conf.set("spark.sql.adaptive.enabled", False)


In [0]:
display(df3.join(df4, "PK", "inner"))

### Non Bucketed to Non Bucketed Join, Both Sides would be Shuffled

In [0]:
df3.join(df4, "PK", "inner").explain()

In [0]:
df3.join(df1, "PK").display()

### Non Bucketed to Bucketed Join, One side would be Shuffled

In [0]:
df3.join(df2, "PK").explain()

In [0]:
df1.join(df2, "PK").display()

### Bucketed to Bucketed Join, No Shuffle at Both Sides

In [0]:
df1.join(df2, "PK").explain()