In [0]:
# Create a DataFrame from a list of tuples
df = spark.createDataFrame(
    [
        ("Sue", 32),
        ("Li", 3),
        ("Bob", 75),
        ("Heo", 13),
    ],
    ["first_name", "age"]
)

# Display the DataFrame
display(df)

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

df_children_with_schema = spark.createDataFrame(
    data=[("Mikhail", 15), ("Zaky", 13), ("Zoya", 8)],
    schema=StructType([
        StructField('name', StringType(), True),
        StructField('age', IntegerType(), True)
    ])
)

display(df_children_with_schema)

In [0]:
from pyspark.sql.functions import when, col

# Select specific columns
df.select("first_name", "age").show()

# Filter rows
df.filter(df.age > 20).show()

# Add a computed column
df_with_stage = df.withColumn(
    "life_stage",
    when(col("age") < 13, "child")
    .when(col("age") < 18, "teenager")
    .otherwise("adult")
)

display(df_with_stage)

In [0]:
# Create a temporary view
df_with_stage.createOrReplaceTempView("some_people")

# Run SQL query
spark.sql("SELECT * FROM some_people WHERE life_stage = 'teenager'").show()

# Insert new data
#spark.sql("INSERT INTO some_people VALUES ('frank', 4, 'child')")

# Verify the insert
spark.sql("SELECT * FROM some_people").show()