RDD Basics: ✅ Tasks: Word count example

In [11]:
from pyspark.sql import SparkSession

# Create Spark Session
spark = SparkSession.builder \
    .appName("HelloWorld") \
    .getOrCreate()
# Example 1 – Creating an RDD
# Create RDD from a Python list
data = [1, 2, 3, 4, 5]
rdd = spark.sparkContext.parallelize(data)
print("RDD from Python list:", rdd.collect())


RDD from Python list: [1, 2, 3, 4, 5]


⚡ Example 2 – Transformations & Actions

In [12]:
# Transformation: square each number
squared = rdd.map(lambda x: x * x)

# Action: collect results
print("Squared:", squared.collect())

Squared: [1, 4, 9, 16, 25]


⚡ Example 3 – Filter Example

In [4]:
evens = rdd.filter(lambda x: x % 2 == 0)
print("Even Numbers:", evens.collect())

Even Numbers: [2, 4]


⚡ Example 4 – Word Count (Classic Example) Map and Reduce

In [8]:
# Sample text
text = ["hello world", "hello spark", "pyspark rdd example"]

# Create RDD
rdd = spark.sparkContext.parallelize(text)

# Split into words
words = rdd.flatMap(lambda line: line.split(" "))
print("Words:", words.collect())
# Count words
word_counts = words.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b)

print("Word Counts:", word_counts.collect())


Words: ['hello', 'world', 'hello', 'spark', 'pyspark', 'rdd', 'example']
Word Counts: [('world', 1), ('spark', 1), ('pyspark', 1), ('example', 1), ('hello', 2), ('rdd', 1)]
