# Ex-2020 - Data Frame


First spark object

In [10]:
# 1. Import SparkSession and create a new session
from pyspark.sql import SparkSession

In [11]:
spark = SparkSession.builder.appName("OfficeWarriors").getOrCreate()

In [12]:

# 2. Define schema and create a DataFrame with office-inspired characters
schema = "Name string, Immortal boolean, AttackPower integer, DefensePower integer, Price integer"

data = [
    ("Printer Slayer", False, 80, 65, 900),
    ("Meeting Overlord", True, 95, 90, 1500),
    ("Spreadsheet Sorcerer", False, 75, 70, 1000),
    ("Deadline Reaper", True, 85, 80, 1200),
    ("Coffee Alchemist", False, 70, 60, 800),
    ("Boss Whisperer", True, 92, 85, 1400),
    ("Email Phantom", False, 68, 55, 700),
    ("Keyboard Ninja", False, 85, 75, 1100),
    ("Task Juggler", True, 90, 88, 1300),
    ("Office Oracle", True, 97, 89, 1600)
]

df = spark.createDataFrame(data, schema=schema)

df.show()

+--------------------+--------+-----------+------------+-----+
|                Name|Immortal|AttackPower|DefensePower|Price|
+--------------------+--------+-----------+------------+-----+
|      Printer Slayer|   false|         80|          65|  900|
|    Meeting Overlord|    true|         95|          90| 1500|
|Spreadsheet Sorcerer|   false|         75|          70| 1000|
|     Deadline Reaper|    true|         85|          80| 1200|
|    Coffee Alchemist|   false|         70|          60|  800|
|      Boss Whisperer|    true|         92|          85| 1400|
|       Email Phantom|   false|         68|          55|  700|
|      Keyboard Ninja|   false|         85|          75| 1100|
|        Task Juggler|    true|         90|          88| 1300|
|       Office Oracle|    true|         97|          89| 1600|
+--------------------+--------+-----------+------------+-----+



In [13]:
# 3. Split characters into mortal and immortal groups
mortals_df = df.filter(df["Immortal"] == False)
immortals_df = df.filter(df["Immortal"] == True)

mortals_df.show()
immortals_df.show()

+--------------------+--------+-----------+------------+-----+
|                Name|Immortal|AttackPower|DefensePower|Price|
+--------------------+--------+-----------+------------+-----+
|      Printer Slayer|   false|         80|          65|  900|
|Spreadsheet Sorcerer|   false|         75|          70| 1000|
|    Coffee Alchemist|   false|         70|          60|  800|
|       Email Phantom|   false|         68|          55|  700|
|      Keyboard Ninja|   false|         85|          75| 1100|
+--------------------+--------+-----------+------------+-----+

+----------------+--------+-----------+------------+-----+
|            Name|Immortal|AttackPower|DefensePower|Price|
+----------------+--------+-----------+------------+-----+
|Meeting Overlord|    true|         95|          90| 1500|
| Deadline Reaper|    true|         85|          80| 1200|
|  Boss Whisperer|    true|         92|          85| 1400|
|    Task Juggler|    true|         90|          88| 1300|
|   Office Oracle| 

In [14]:
# 5. Count the number of mortal and immortal characters
print(f"Number of mortal characters: {mortals_df.count()}")
print(f"Number of immortal characters: {immortals_df.count()}")

Number of mortal characters: 5
Number of immortal characters: 5


In [15]:
# 6. Find characters weaker than a given attack power
your_attack_power = 85  # Adjust as needed
weaker_df = df.filter(df["AttackPower"] < your_attack_power)

weaker_df.show()
print(f"Number of characters weaker than you: {weaker_df.count()}")

+--------------------+--------+-----------+------------+-----+
|                Name|Immortal|AttackPower|DefensePower|Price|
+--------------------+--------+-----------+------------+-----+
|      Printer Slayer|   false|         80|          65|  900|
|Spreadsheet Sorcerer|   false|         75|          70| 1000|
|    Coffee Alchemist|   false|         70|          60|  800|
|       Email Phantom|   false|         68|          55|  700|
+--------------------+--------+-----------+------------+-----+

Number of characters weaker than you: 4


In [16]:
# 7. Check if the original DataFrame remains unchanged
df.show()

+--------------------+--------+-----------+------------+-----+
|                Name|Immortal|AttackPower|DefensePower|Price|
+--------------------+--------+-----------+------------+-----+
|      Printer Slayer|   false|         80|          65|  900|
|    Meeting Overlord|    true|         95|          90| 1500|
|Spreadsheet Sorcerer|   false|         75|          70| 1000|
|     Deadline Reaper|    true|         85|          80| 1200|
|    Coffee Alchemist|   false|         70|          60|  800|
|      Boss Whisperer|    true|         92|          85| 1400|
|       Email Phantom|   false|         68|          55|  700|
|      Keyboard Ninja|   false|         85|          75| 1100|
|        Task Juggler|    true|         90|          88| 1300|
|       Office Oracle|    true|         97|          89| 1600|
+--------------------+--------+-----------+------------+-----+

