### ![Spark Logo Tiny](https://files.training.databricks.com/images/105/logo_spark_tiny.png) Creación de un Dataframe con una lista y .toDF

#### Ejemplo 1

In [0]:
empleados = [(1, 'Scott', 'Tiger', 1000.0, 'United States', '+1 123 456 7890', '123 45 6789'),
             (2, 'Henry', 'Ford', 1250.0, 'India', '+91 234 567 8901', '456 78 9123'),
             (3, 'Nick', 'Menza', 750.0, 'United Kingdom', '+44 111 111 1111', '222 33 4444'),
             (4, 'Bill', 'Gates', 1500.0, 'Asutralia', '+61 987 654 3210', '789 12 6118')
            ]

df = spark.createDataFrame(empleados).toDF('id','nombre', 'apellido', 'salario', 'nacionalidad', 'telefono', 'ssn')

In [0]:
df.printSchema()

root
 |-- id: long (nullable = true)
 |-- nombre: string (nullable = true)
 |-- apellido: string (nullable = true)
 |-- salario: double (nullable = true)
 |-- nacionalidad: string (nullable = true)
 |-- telefono: string (nullable = true)
 |-- ssn: string (nullable = true)



In [0]:
df.show()

+---+------+--------+-------+--------------+----------------+-----------+
| id|nombre|apellido|salario|  nacionalidad|        telefono|        ssn|
+---+------+--------+-------+--------------+----------------+-----------+
|  1| Scott|   Tiger| 1000.0| United States| +1 123 456 7890|123 45 6789|
|  2| Henry|    Ford| 1250.0|         India|+91 234 567 8901|456 78 9123|
|  3|  Nick|   Menza|  750.0|United Kingdom|+44 111 111 1111|222 33 4444|
|  4|  Bill|   Gates| 1500.0|     Asutralia|+61 987 654 3210|789 12 6118|
+---+------+--------+-------+--------------+----------------+-----------+



#### Ejemplo 2

In [0]:
df = spark.read.format("csv").\
     option("inferSchema",True).\
     option("header",True).\
     option("sep",",").\
     load('/FileStore/2021-03-21/races.csv')

df.show(n=3, truncate=False, vertical=False)

+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
|raceId|year|round|circuitId|name                 |date      |time    |url                                                    |
+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
|1     |2009|1    |1        |Australian Grand Prix|2009-03-29|06:00:00|http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix|
|2     |2009|2    |2        |Malaysian Grand Prix |2009-04-05|09:00:00|http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix |
|3     |2009|3    |17       |Chinese Grand Prix   |2009-04-19|07:00:00|http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix   |
+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
only showing top 3 rows



In [0]:
df = spark.read.\
     option("inferSchema",True).\
     option("header",True).\
     option("sep",",").\
     csv('/FileStore/2021-03-21/races.csv').\
     toDF('carreraId','ano','ronda','circuitoId','nombre','fecha','hora','url')

df.show(n=3, truncate=False, vertical=False)

+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
|carreraId|ano |ronda|circuitoId|nombre               |fecha     |hora    |url                                                    |
+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
|1        |2009|1    |1         |Australian Grand Prix|2009-03-29|06:00:00|http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix|
|2        |2009|2    |2         |Malaysian Grand Prix |2009-04-05|09:00:00|http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix |
|3        |2009|3    |17        |Chinese Grand Prix   |2009-04-19|07:00:00|http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix   |
+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
only showing top 3 rows



#### Ejemplo 3 (Incorrecto)

In [0]:
columnas = ['carreraId','ano','ronda','circuitoId','nombre','fecha','hora','url']

df = spark.read.\
     option("inferSchema",True).\
     option("header",True).\
     option("sep",",").\
     csv('/FileStore/2021-03-21/races.csv').\
     toDF(columnas)

df.show(n=3, truncate=False, vertical=False)

[0;31m---------------------------------------------------------------------------[0m
[0;31mIllegalArgumentException[0m                  Traceback (most recent call last)
File [0;32m<command-619386710229849>:3[0m
[1;32m      1[0m columnas [38;5;241m=[39m [[38;5;124m'[39m[38;5;124mcarreraId[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mano[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mronda[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mcircuitoId[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mnombre[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mfecha[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124mhora[39m[38;5;124m'[39m,[38;5;124m'[39m[38;5;124murl[39m[38;5;124m'[39m]
[0;32m----> 3[0m df [38;5;241m=[39m spark[38;5;241m.[39mread[38;5;241m.[39m\
[1;32m      4[0m      option([38;5;124m"[39m[38;5;124minferSchema[39m[38;5;124m"[39m,[38;5;28;01mTrue[39;00m)[38;5;241m.[39m\
[1;32m      5[0m      option([38;5;124m"[39m[38;5;124

#### Ejemplo 4

In [0]:
columnas = ['carreraId','ano','ronda','circuitoId','nombre','fecha','hora','url']
print(*columnas)

carreraId ano ronda circuitoId nombre fecha hora url


In [0]:
columnas = ['carreraId','ano','ronda','circuitoId','nombre','fecha','hora','url']

df = spark.read.\
     option("inferSchema",True).\
     option("header",True).\
     option("sep",",").\
     csv('/FileStore/2021-03-21/races.csv').\
     toDF(*columnas)

df.show(n=3, truncate=False, vertical=False)

+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
|carreraId|ano |ronda|circuitoId|nombre               |fecha     |hora    |url                                                    |
+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
|1        |2009|1    |1         |Australian Grand Prix|2009-03-29|06:00:00|http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix|
|2        |2009|2    |2         |Malaysian Grand Prix |2009-04-05|09:00:00|http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix |
|3        |2009|3    |17        |Chinese Grand Prix   |2009-04-19|07:00:00|http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix   |
+---------+----+-----+----------+---------------------+----------+--------+-------------------------------------------------------+
only showing top 3 rows

