### ![Spark Logo Tiny](https://files.training.databricks.com/images/105/logo_spark_tiny.png) Eliminar columnas

In [0]:
from pyspark.sql.functions import lit, concat, col

employee_data = [(10,"Raj","Kumar","1999","100","M",2000),
                 (20,"Rahul","Rajan","2002","200","f",2000),
                 (30,"Raghav","Manish","2010","100",None,2000),
                 (40,"Raja","Singh","2004","100","F",2000),
                 (50,"Rama","Krish","2008","400","M",2000),
                 (60,"Rasul","Kutty","2014","500","M",2000),
                 (70,"Kumar","Chand","2004","600","M",2000)
                ]
employee_schema = ["employee_id","first_name","last_name","doj",
                   "employee_dept_id","gender","salary"]

df = spark.createDataFrame(data=employee_data, schema=employee_schema)

#### Eliminar una columna

##### Forma 1

In [0]:
df_modif = df.drop('salary')

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id,gender
10,Raj,Kumar,1999,100,M
20,Rahul,Rajan,2002,200,f
30,Raghav,Manish,2010,100,
40,Raja,Singh,2004,100,F
50,Rama,Krish,2008,400,M
60,Rasul,Kutty,2014,500,M
70,Kumar,Chand,2004,600,M


##### Forma 2

In [0]:
df_modif = df.drop(col('salary'))

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id,gender
10,Raj,Kumar,1999,100,M
20,Rahul,Rajan,2002,200,f
30,Raghav,Manish,2010,100,
40,Raja,Singh,2004,100,F
50,Rama,Krish,2008,400,M
60,Rasul,Kutty,2014,500,M
70,Kumar,Chand,2004,600,M


##### Forma 3

In [0]:
df_modif = df.drop(df['salary'])

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id,gender
10,Raj,Kumar,1999,100,M
20,Rahul,Rajan,2002,200,f
30,Raghav,Manish,2010,100,
40,Raja,Singh,2004,100,F
50,Rama,Krish,2008,400,M
60,Rasul,Kutty,2014,500,M
70,Kumar,Chand,2004,600,M


#### Eliminar múltiples columnas

##### Forma 1

In [0]:
df_modif = df.drop('gender').drop('salary')

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id
10,Raj,Kumar,1999,100
20,Rahul,Rajan,2002,200
30,Raghav,Manish,2010,100
40,Raja,Singh,2004,100
50,Rama,Krish,2008,400
60,Rasul,Kutty,2014,500
70,Kumar,Chand,2004,600


##### Forma 2

In [0]:
df_modif = df.drop('gender','salary')

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id
10,Raj,Kumar,1999,100
20,Rahul,Rajan,2002,200
30,Raghav,Manish,2010,100
40,Raja,Singh,2004,100
50,Rama,Krish,2008,400
60,Rasul,Kutty,2014,500
70,Kumar,Chand,2004,600


#### Error al utilizar .col para eliminar múltiples columnas

In [0]:
df_modif = df.drop(col('gender'),col('salary'))

display(df_modif)

[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
[0;32m<command-804623631765890>[0m in [0;36m<cell line: 1>[0;34m()[0m
[0;32m----> 1[0;31m [0mdf_modif[0m [0;34m=[0m [0mdf[0m[0;34m.[0m[0mdrop[0m[0;34m([0m[0mcol[0m[0;34m([0m[0;34m'gender'[0m[0;34m)[0m[0;34m,[0m[0mcol[0m[0;34m([0m[0;34m'salary'[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m      2[0m [0;34m[0m[0m
[1;32m      3[0m [0mdisplay[0m[0;34m([0m[0mdf_modif[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m

[0;32m/databricks/spark/python/pyspark/instrumentation_utils.py[0m in [0;36mwrapper[0;34m(*args, **kwargs)[0m
[1;32m     46[0m             [0mstart[0m [0;34m=[0m [0mtime[0m[0;34m.[0m[0mperf_counter[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[1;32m     47[0m             [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m


#### Si tenemos un nombre de columna que no existe, la columna será ignorada

In [0]:
df_modif = df.drop('gender','salary','location')

display(df_modif)

employee_id,first_name,last_name,doj,employee_dept_id
10,Raj,Kumar,1999,100
20,Rahul,Rajan,2002,200
30,Raghav,Manish,2010,100
40,Raja,Singh,2004,100
50,Rama,Krish,2008,400
60,Rasul,Kutty,2014,500
70,Kumar,Chand,2004,600


#### Eliminar una lista de columnas

In [0]:
lista = ['doj','gender','salary']

df_modif = df.drop(*lista)

df_modif.show()

+-----------+----------+---------+----------------+
|employee_id|first_name|last_name|employee_dept_id|
+-----------+----------+---------+----------------+
|         10|       Raj|    Kumar|             100|
|         20|     Rahul|    Rajan|             200|
|         30|    Raghav|   Manish|             100|
|         40|      Raja|    Singh|             100|
|         50|      Rama|    Krish|             400|
|         60|     Rasul|    Kutty|             500|
|         70|     Kumar|    Chand|             600|
+-----------+----------+---------+----------------+

