In [0]:
from pyspark.sql import functions as F

data = [
    (1, "Ankit", "Math", 85, "A"),
    (2, "Divya", "Science", 92, "A"),
    (3, "Rahul", "English", 78, "B"),
    (4, "Sneha", "Math", 65, "C"),
    (5, "Aryan", "Science", 55, "D"),
    (6, "Isha", "English", 88, "A"),
    (7, "Tanvi", "Math", 91, "A"),
    (8, "Kunal", "Science", 72, "B"),
    (9, "Megha", "English", 60, "C"),
    (10, "Rohan", "Math", 40, "F")
]
columns = ["student_id", "name", "subject", "score", "grade"]

df = spark.createDataFrame(data, columns)

delta_path = "/tmp/delta/student_scores"
df.write.format("delta").mode("overwrite").save(delta_path)


df_delta = spark.read.format("delta").load(delta_path)


df_delta.createOrReplaceTempView("student_scores")

spark.sql("SELECT name, score FROM student_scores").show()


+-----+-----+
| name|score|
+-----+-----+
|Ankit|   85|
|Divya|   92|
|Rahul|   78|
|Sneha|   65|
|Aryan|   55|
| Isha|   88|
|Tanvi|   91|
|Kunal|   72|
|Megha|   60|
|Rohan|   40|
+-----+-----+



In [0]:
spark.sql("SELECT name, score FROM student_scores").show()


+-----+-----+
| name|score|
+-----+-----+
|Ankit|   85|
|Divya|   92|
|Rahul|   78|
|Sneha|   65|
|Aryan|   55|
| Isha|   88|
|Tanvi|   91|
|Kunal|   72|
|Megha|   60|
|Rohan|   40|
+-----+-----+



In [0]:
spark.sql("""
SELECT subject, COUNT(*) AS student_count
FROM student_scores
GROUP BY subject
""").show()


+-------+-------------+
|subject|student_count|
+-------+-------------+
|Science|            3|
|   Math|            4|
|English|            3|
+-------+-------------+



In [0]:
spark.sql("""
SELECT subject, AVG(score) AS avg_score
FROM student_scores
GROUP BY subject
""").show()


+-------+-----------------+
|subject|        avg_score|
+-------+-----------------+
|Science|             73.0|
|   Math|            70.25|
|English|75.33333333333333|
+-------+-----------------+



In [0]:
spark.sql("""
SELECT name, score
FROM student_scores
WHERE score > 80
""").show()


+-----+-----+
| name|score|
+-----+-----+
|Ankit|   85|
|Divya|   92|
| Isha|   88|
|Tanvi|   91|
+-----+-----+



In [0]:
spark.sql("""
SELECT subject, name, score
FROM (
    SELECT subject, name, score,
           RANK() OVER (PARTITION BY subject ORDER BY score DESC) AS rnk
    FROM student_scores
)
WHERE rnk = 1
""").show()


+-------+-----+-----+
|subject| name|score|
+-------+-----+-----+
|English| Isha|   88|
|   Math|Tanvi|   91|
|Science|Divya|   92|
+-------+-----+-----+



In [0]:
spark.sql("""
SELECT grade, COUNT(*) AS student_count
FROM student_scores
GROUP BY grade
""").show()


+-----+-------------+
|grade|student_count|
+-----+-------------+
|    F|            1|
|    B|            2|
|    D|            1|
|    C|            2|
|    A|            4|
+-----+-------------+



In [0]:
spark.sql("""
SELECT name
FROM student_scores
WHERE grade = 'F'
""").show()


+-----+
| name|
+-----+
|Rohan|
+-----+



In [0]:
spark.sql("""
SELECT name, score
FROM student_scores
WHERE score BETWEEN 60 AND 90
""").show()


+-----+-----+
| name|score|
+-----+-----+
|Ankit|   85|
|Rahul|   78|
|Sneha|   65|
| Isha|   88|
|Kunal|   72|
|Megha|   60|
+-----+-----+



In [0]:
spark.sql("""
SELECT subject, name, score,
       RANK() OVER (PARTITION BY subject ORDER BY score DESC) AS rank
FROM student_scores
""").show()


+-------+-----+-----+----+
|subject| name|score|rank|
+-------+-----+-----+----+
|English| Isha|   88|   1|
|English|Rahul|   78|   2|
|English|Megha|   60|   3|
|   Math|Tanvi|   91|   1|
|   Math|Ankit|   85|   2|
|   Math|Sneha|   65|   3|
|   Math|Rohan|   40|   4|
|Science|Divya|   92|   1|
|Science|Kunal|   72|   2|
|Science|Aryan|   55|   3|
+-------+-----+-----+----+



In [0]:
from delta.tables import DeltaTable

delta_path = "/tmp/delta/student_scores"
delta_table = DeltaTable.forPath(spark, delta_path)

delta_table.update(
    condition="subject = 'English'",
    set={"score": "score + 5"}
)


df_delta = spark.read.format("delta").load(delta_path)
df_delta.createOrReplaceTempView("student_scores")

display(df_delta)


student_id,name,subject,score,grade,pass_status
3,Rahul,English,93,B,PASS
6,Isha,English,103,A,PASS
9,Megha,English,75,C,PASS
1,Ankit,Math,85,A,PASS
2,Divya,Science,92,A,PASS
4,Sneha,Math,65,C,PASS
5,Aryan,Science,55,D,PASS
7,Tanvi,Math,91,A,PASS
8,Kunal,Science,72,B,PASS


In [0]:
delta_table.delete("score < 50")

df_delta = spark.read.format("delta").load(delta_path)
df_delta.createOrReplaceTempView("student_scores")
display(df_delta)

student_id,name,subject,score,grade,pass_status
3,Rahul,English,83,B,PASS
6,Isha,English,93,A,PASS
9,Megha,English,65,C,PASS
1,Ankit,Math,85,A,PASS
2,Divya,Science,92,A,PASS
4,Sneha,Math,65,C,PASS
5,Aryan,Science,55,D,PASS
7,Tanvi,Math,91,A,PASS
8,Kunal,Science,72,B,PASS


In [0]:
from pyspark.sql import functions as F

df_updated = spark.read.format("delta").load(delta_path) \
    .withColumn("pass_status", F.when(F.col("score") >= 50, "PASS").otherwise("FAIL"))

df_updated.write.format("delta").mode("overwrite").option("mergeSchema", "true").save(delta_path)

df_updated.createOrReplaceTempView("student_scores")

display(df_updated)


student_id,name,subject,score,grade,pass_status
3,Rahul,English,83,B,PASS
6,Isha,English,93,A,PASS
9,Megha,English,65,C,PASS
1,Ankit,Math,85,A,PASS
2,Divya,Science,92,A,PASS
4,Sneha,Math,65,C,PASS
5,Aryan,Science,55,D,PASS
7,Tanvi,Math,91,A,PASS
8,Kunal,Science,72,B,PASS


In [0]:
spark.sql("""
SELECT subject, AVG(score) AS avg_score
FROM student_scores
GROUP BY subject
""").show()


+-------+-----------------+
|subject|        avg_score|
+-------+-----------------+
|Science|             73.0|
|   Math|80.33333333333333|
|English|80.33333333333333|
+-------+-----------------+



In [0]:
delta_path_v2 = "/tmp/delta/student_scores_v2"
df_updated.write.format("delta").mode("overwrite").save(delta_path_v2)

df_v2 = spark.read.format("delta").load(delta_path_v2)
df_v2.createOrReplaceTempView("student_scores_v2")

display(df_v2)


student_id,name,subject,score,grade,pass_status
3,Rahul,English,83,B,PASS
6,Isha,English,93,A,PASS
9,Megha,English,65,C,PASS
1,Ankit,Math,85,A,PASS
2,Divya,Science,92,A,PASS
4,Sneha,Math,65,C,PASS
5,Aryan,Science,55,D,PASS
7,Tanvi,Math,91,A,PASS
8,Kunal,Science,72,B,PASS


In [0]:
df_updated.write.mode("overwrite").parquet("/tmp/output/student_scores_parquet")
df_updated.write.mode("overwrite").json("/tmp/output/student_scores_json")
