### ![Spark Logo Tiny](https://files.training.databricks.com/images/105/logo_spark_tiny.png) Función When, Otherwise y Case

In [0]:
from pyspark.sql.functions import *

estudiante = [("Raj","Science",86,"A",90),
              ("Rahul","Math",56,"R",70),
              ("Raghav","English",77,"A",85),
              ("Raja","Science",80,"A",85),
              ("Rama","Math",69,"A",70),
              ("Rasul","Math",45,"R",50),
              ("Kumar","English",59,"R",55)
             ]
schema = ["nombre","ramo","puntaje","status","asistencia"]

df = spark.createDataFrame(data=estudiante, schema=schema)
df.show()

+------+-------+-------+------+----------+
|nombre|   ramo|puntaje|status|asistencia|
+------+-------+-------+------+----------+
|   Raj|Science|     86|     A|        90|
| Rahul|   Math|     56|     R|        70|
|Raghav|English|     77|     A|        85|
|  Raja|Science|     80|     A|        85|
|  Rama|   Math|     69|     A|        70|
| Rasul|   Math|     45|     R|        50|
| Kumar|English|     59|     R|        55|
+------+-------+-------+------+----------+



#### Actualizar los valores de una columna

In [0]:
df_modif = df.withColumn('status', when(df.puntaje >= 60, "Aprobado"). \
                                   when(df.puntaje < 60, "Reprobado"). \
                                   otherwise('Ausente'))

df_modif.show(truncate=False)

+------+-------+-------+---------+----------+
|nombre|ramo   |puntaje|status   |asistencia|
+------+-------+-------+---------+----------+
|Raj   |Science|86     |Aprobado |90        |
|Rahul |Math   |56     |Reprobado|70        |
|Raghav|English|77     |Aprobado |85        |
|Raja  |Science|80     |Aprobado |85        |
|Rama  |Math   |69     |Aprobado |70        |
|Rasul |Math   |45     |Reprobado|50        |
|Kumar |English|59     |Reprobado|55        |
+------+-------+-------+---------+----------+



#### Crear una nueva columna

In [0]:
df_modif = df.withColumn('nuevo_status', when(df.puntaje >= 60, "Aprobado"). \
                                         when(df.puntaje < 60, "Reprobado"). \
                                         otherwise('Ausente'))

df_modif.show(truncate=False)

+------+-------+-------+------+----------+------------+
|nombre|ramo   |puntaje|status|asistencia|nuevo_status|
+------+-------+-------+------+----------+------------+
|Raj   |Science|86     |A     |90        |Aprobado    |
|Rahul |Math   |56     |R     |70        |Reprobado   |
|Raghav|English|77     |A     |85        |Aprobado    |
|Raja  |Science|80     |A     |85        |Aprobado    |
|Rama  |Math   |69     |A     |70        |Aprobado    |
|Rasul |Math   |45     |R     |50        |Reprobado   |
|Kumar |English|59     |R     |55        |Reprobado   |
+------+-------+-------+------+----------+------------+



#### Utilizando una expresión SQL

In [0]:
df_modif = df.withColumn('nuevo_status', expr('''CASE WHEN puntaje >= 60 THEN "Aprobado"
                                                      WHEN puntaje < 60 THEN "Reprobado"
                                                      ELSE "Ausente"
                                                  END
                                              '''))

df_modif.show(truncate=False)

+------+-------+-------+------+----------+------------+
|nombre|ramo   |puntaje|status|asistencia|nuevo_status|
+------+-------+-------+------+----------+------------+
|Raj   |Science|86     |A     |90        |Aprobado    |
|Rahul |Math   |56     |R     |70        |Reprobado   |
|Raghav|English|77     |A     |85        |Aprobado    |
|Raja  |Science|80     |A     |85        |Aprobado    |
|Rama  |Math   |69     |A     |70        |Aprobado    |
|Rasul |Math   |45     |R     |50        |Reprobado   |
|Kumar |English|59     |R     |55        |Reprobado   |
+------+-------+-------+------+----------+------------+



#### Multi-condiciones usando los operadores AND y OR

In [0]:
df_modif = df.withColumn('grado', when((df.puntaje >= 80) & (df.asistencia >=80), "Distinción"). \
                                  when((df.puntaje >= 60) & (df.asistencia >=60), "Bueno"). \
                                  otherwise('Promedio'))

df_modif.show(truncate=False)

+------+-------+-------+------+----------+----------+
|nombre|ramo   |puntaje|status|asistencia|grado     |
+------+-------+-------+------+----------+----------+
|Raj   |Science|86     |A     |90        |Distinción|
|Rahul |Math   |56     |R     |70        |Promedio  |
|Raghav|English|77     |A     |85        |Bueno     |
|Raja  |Science|80     |A     |85        |Distinción|
|Rama  |Math   |69     |A     |70        |Bueno     |
|Rasul |Math   |45     |R     |50        |Promedio  |
|Kumar |English|59     |R     |55        |Promedio  |
+------+-------+-------+------+----------+----------+



#### Ejemplo extra

In [0]:
from pyspark.sql.functions import *

personas = [(1,1),(2,13),(3,18),(4,60),(5,90),(6,25),(7,33),(8,40)]

df = spark.createDataFrame(personas,schema='id INT, edad INT')

df.show()

+---+----+
| id|edad|
+---+----+
|  1|   1|
|  2|  13|
|  3|  18|
|  4|  60|
|  5|  90|
|  6|  25|
|  7|  33|
|  8|  40|
+---+----+



In [0]:
df_modif = df.withColumn('categoria', expr('''CASE WHEN edad BETWEEN 1 AND 2 THEN "Recién nacido"
                                                   WHEN edad > 2 AND edad <=5 THEN "Infante"
                                                   WHEN edad > 5 AND edad <=12 THEN "Niño"
                                                   WHEN edad > 12 AND edad <=18 THEN "Adolescente"
                                                   ELSE "Adulto"
                                              END
                                           '''))

df_modif.show(truncate=False)

+---+----+-------------+
|id |edad|categoria    |
+---+----+-------------+
|1  |1   |Recién nacido|
|2  |13  |Adolescente  |
|3  |18  |Adolescente  |
|4  |60  |Adulto       |
|5  |90  |Adulto       |
|6  |25  |Adulto       |
|7  |33  |Adulto       |
|8  |40  |Adulto       |
+---+----+-------------+



In [0]:
df_modif = df.withColumn('categoria', when(col('edad').between(1,2), "Recién nacido"). \
                                      when((col('edad') > 2) & (col('edad') <=5), "Infante"). \
                                      when((col('edad') > 5) & (col('edad') <=12), "Niño"). \
                                      when((col('edad') > 12) & (col('edad') <=18), "Adolescente"). \
                                      otherwise("Adulto"))

df_modif.show(truncate=False)

+---+----+-------------+
|id |edad|categoria    |
+---+----+-------------+
|1  |1   |Recién nacido|
|2  |13  |Adolescente  |
|3  |18  |Adolescente  |
|4  |60  |Adulto       |
|5  |90  |Adulto       |
|6  |25  |Adulto       |
|7  |33  |Adulto       |
|8  |40  |Adulto       |
+---+----+-------------+

