### ![Spark Logo Tiny](https://files.training.databricks.com/images/105/logo_spark_tiny.png) Timestamp functions

In [None]:
from pyspark.sql.functions import *

data = [['1','2020-02-01 11 01 19 06'],
        ['2','2019-03-01 12 01 19 27'],
        ['3','2021-06-15 10 02 20 44']
       ]

df = spark.createDataFrame(data,['id','input'])
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)

+---+----------------------+
|id |input                 |
+---+----------------------+
|1  |2020-02-01 11 01 19 06|
|2  |2019-03-01 12 01 19 27|
|3  |2021-06-15 10 02 20 44|
+---+----------------------+



#### current_timestamp( )

In [None]:
df_modif = df.withColumn('current_timestamp',current_timestamp())

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)
 |-- current_timestamp: timestamp (nullable = false)

+---+----------------------+-----------------------+
|id |input                 |current_timestamp      |
+---+----------------------+-----------------------+
|1  |2020-02-01 11 01 19 06|2023-01-12 23:11:16.282|
|2  |2019-03-01 12 01 19 27|2023-01-12 23:11:16.282|
|3  |2021-06-15 10 02 20 44|2023-01-12 23:11:16.282|
+---+----------------------+-----------------------+



In [None]:
df_modif = df.select(current_timestamp().alias('current_timestamp'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- current_timestamp: timestamp (nullable = false)

+-----------------------+
|current_timestamp      |
+-----------------------+
|2023-01-12 23:11:16.754|
|2023-01-12 23:11:16.754|
|2023-01-12 23:11:16.754|
+-----------------------+



#### to_timestamp( )

Convierte un string timestamp a un formato de tipo Timestamp.

##### String a Timestamp

Ejemplo 1

In [None]:
df_modif = df.select(col('input'),to_timestamp(col('input'),'yyyy-MM-dd HH mm ss SSS').alias('fecha_nueva'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- input: string (nullable = true)
 |-- fecha_nueva: timestamp (nullable = true)

+----------------------+----------------------+
|input                 |fecha_nueva           |
+----------------------+----------------------+
|2020-02-01 11 01 19 06|2020-02-01 11:01:19.06|
|2019-03-01 12 01 19 27|2019-03-01 12:01:19.27|
|2021-06-15 10 02 20 44|2021-06-15 10:02:20.44|
+----------------------+----------------------+



Ejemplo 2

In [None]:
from pyspark.sql.functions import *

data = [['1','2020-02-01'],
        ['2','2019-03-07'],
        ['3','2021-06-10']
       ]

df = spark.createDataFrame(data,['id','input'])
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)

+---+----------+
|id |input     |
+---+----------+
|1  |2020-02-01|
|2  |2019-03-07|
|3  |2021-06-10|
+---+----------+



In [None]:
df_timestamp = df.withColumn("timestamp", to_timestamp(col('input'), 'yyyy-MM-dd HH:mm:ss'))

df_timestamp.show()

+---+----------+---------+
| id|     input|timestamp|
+---+----------+---------+
|  1|2020-02-01|     null|
|  2|2019-03-07|     null|
|  3|2021-06-10|     null|
+---+----------+---------+



Ejemplo 3

In [None]:
df_races = spark.read.format("csv").\
           option("header",True).\
           option("sep",",").\
           load('/FileStore/tables/raw/races.csv')

df_races.show(n=3, truncate=False, vertical=False)

+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
|raceId|year|round|circuitId|name                 |date      |time    |url                                                    |
+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
|1     |2009|1    |1        |Australian Grand Prix|2009-03-29|06:00:00|http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix|
|2     |2009|2    |2        |Malaysian Grand Prix |2009-04-05|09:00:00|http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix |
|3     |2009|3    |17       |Chinese Grand Prix   |2009-04-19|07:00:00|http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix   |
+------+----+-----+---------+---------------------+----------+--------+-------------------------------------------------------+
only showing top 3 rows



In [None]:
df_races.printSchema()

root
 |-- raceId: string (nullable = true)
 |-- year: string (nullable = true)
 |-- round: string (nullable = true)
 |-- circuitId: string (nullable = true)
 |-- name: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)
 |-- url: string (nullable = true)



In [None]:
df_timestamp = df_races.withColumn("timestamp", to_timestamp(concat(col('date'), lit(' '), col('time')), 'yyyy-MM-dd HH:mm:ss'))

df_timestamp.show()

+------+----+-----+---------+--------------------+----------+--------+--------------------+-------------------+
|raceId|year|round|circuitId|                name|      date|    time|                 url|          timestamp|
+------+----+-----+---------+--------------------+----------+--------+--------------------+-------------------+
|     1|2009|    1|        1|Australian Grand ...|2009-03-29|06:00:00|http://en.wikiped...|2009-03-29 06:00:00|
|     2|2009|    2|        2|Malaysian Grand Prix|2009-04-05|09:00:00|http://en.wikiped...|2009-04-05 09:00:00|
|     3|2009|    3|       17|  Chinese Grand Prix|2009-04-19|07:00:00|http://en.wikiped...|2009-04-19 07:00:00|
|     4|2009|    4|        3|  Bahrain Grand Prix|2009-04-26|12:00:00|http://en.wikiped...|2009-04-26 12:00:00|
|     5|2009|    5|        4|  Spanish Grand Prix|2009-05-10|12:00:00|http://en.wikiped...|2009-05-10 12:00:00|
|     6|2009|    6|        6|   Monaco Grand Prix|2009-05-24|12:00:00|http://en.wikiped...|2009-05-24 12

In [None]:
df_timestamp.printSchema()

root
 |-- raceId: string (nullable = true)
 |-- year: string (nullable = true)
 |-- round: string (nullable = true)
 |-- circuitId: string (nullable = true)
 |-- name: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)
 |-- url: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)



##### mm/dd/yyyy a Timestamp

In [None]:
from pyspark.sql.functions import *

df = spark.createDataFrame([('11/25/1991',),('12/24/1992',),('10/15/1994',)],['date_str'])

df.printSchema()
df.show()

root
 |-- date_str: string (nullable = true)

+----------+
|  date_str|
+----------+
|11/25/1991|
|12/24/1992|
|10/15/1994|
+----------+



In [None]:
df_modif = df.select('date_str',from_unixtime(unix_timestamp('date_str','MM/dd/yyyy')).alias('fecha_nueva'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- date_str: string (nullable = true)
 |-- fecha_nueva: string (nullable = true)

+----------+-------------------+
|date_str  |fecha_nueva        |
+----------+-------------------+
|11/25/1991|1991-11-25 00:00:00|
|12/24/1992|1992-12-24 00:00:00|
|10/15/1994|1994-10-15 00:00:00|
+----------+-------------------+



In [None]:
df_timestamp = df_modif.select(col('fecha_nueva'),to_timestamp(col('fecha_nueva'),'yyyy-MM-dd HH:mm:ss').alias('timestamp'))

df_timestamp.printSchema()
df_timestamp.show(truncate=False)

root
 |-- fecha_nueva: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+-------------------+-------------------+
|fecha_nueva        |timestamp          |
+-------------------+-------------------+
|1991-11-25 00:00:00|1991-11-25 00:00:00|
|1992-12-24 00:00:00|1992-12-24 00:00:00|
|1994-10-15 00:00:00|1994-10-15 00:00:00|
+-------------------+-------------------+



#### hour( )
#### minute( )
#### second( )

In [None]:
from pyspark.sql.functions import *

data = [['1','2020-02-01 11:01:19.06'],
        ['2','2019-03-01 12:01:19.27'],
        ['3','2021-06-15 10:02:20.44']
       ]

df = spark.createDataFrame(data,['id','input'])
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)

+---+----------------------+
|id |input                 |
+---+----------------------+
|1  |2020-02-01 11:01:19.06|
|2  |2019-03-01 12:01:19.27|
|3  |2021-06-15 10:02:20.44|
+---+----------------------+



In [None]:
df_modif = df.select(col('input'),
              hour(col('input')).alias('hour'),
              minute(col('input')).alias('minute'),
              second(col('input')).alias('second'),
             )

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- input: string (nullable = true)
 |-- hour: integer (nullable = true)
 |-- minute: integer (nullable = true)
 |-- second: integer (nullable = true)

+----------------------+----+------+------+
|input                 |hour|minute|second|
+----------------------+----+------+------+
|2020-02-01 11:01:19.06|11  |1     |19    |
|2019-03-01 12:01:19.27|12  |1     |19    |
|2021-06-15 10:02:20.44|10  |2     |20    |
+----------------------+----+------+------+



#### from_unixtime( )

In [None]:
from pyspark.sql.functions import *

data = [(0,1100746394),(1,1474410343),(2,1116610009),(3,1408024997)]

columnas = ['local_id','fecha_apertura']

df = spark.createDataFrame(data,'local_int INT, fecha_apertura INT')

df.printSchema()
df.show()

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)

+---------+--------------+
|local_int|fecha_apertura|
+---------+--------------+
|        0|    1100746394|
|        1|    1474410343|
|        2|    1116610009|
|        3|    1408024997|
+---------+--------------+



##### Ejemplo 1

UNIX epoch (Integer) a String (con formato Timestamp)

In [None]:
df_modif = df.withColumn('fecha_apertura_string', from_unixtime('fecha_apertura'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- fecha_apertura_string: string (nullable = true)

+---------+--------------+---------------------+
|local_int|fecha_apertura|fecha_apertura_string|
+---------+--------------+---------------------+
|0        |1100746394    |2004-11-18 02:53:14  |
|1        |1474410343    |2016-09-20 22:25:43  |
|2        |1116610009    |2005-05-20 17:26:49  |
|3        |1408024997    |2014-08-14 14:03:17  |
+---------+--------------+---------------------+



##### Ejemplo 2

UNIX epoch (Integer) a String

Este ejemplo no se puede utilizar con date_format, dado que dicha función solo acepta día, mes y año y no horas y segundos.

In [None]:
formato = 'EEEE, MMM d, yyyy h:mm a'
df_modif = df.withColumn('fecha_apertura_string', from_unixtime('fecha_apertura', formato))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- fecha_apertura_string: string (nullable = true)

+---------+--------------+------------------------------+
|local_int|fecha_apertura|fecha_apertura_string         |
+---------+--------------+------------------------------+
|0        |1100746394    |Thursday, Nov 18, 2004 2:53 AM|
|1        |1474410343    |Tuesday, Sep 20, 2016 10:25 PM|
|2        |1116610009    |Friday, May 20, 2005 5:26 PM  |
|3        |1408024997    |Thursday, Aug 14, 2014 2:03 PM|
+---------+--------------+------------------------------+



#### cast()

In [None]:
from pyspark.sql.functions import *

data = [(0,1100746394),(1,1474410343),(2,1116610009),(3,1408024997)]

columnas = ['local_id','fecha_apertura']

df = spark.createDataFrame(data,'local_int INT, fecha_apertura INT')

df.printSchema()
df.show()

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)

+---------+--------------+
|local_int|fecha_apertura|
+---------+--------------+
|        0|    1100746394|
|        1|    1474410343|
|        2|    1116610009|
|        3|    1408024997|
+---------+--------------+



In [None]:
df_modif = df.withColumn('timestamp', col('fecha_apertura').cast('timestamp'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+---------+--------------+-------------------+
|local_int|fecha_apertura|timestamp          |
+---------+--------------+-------------------+
|0        |1100746394    |2004-11-18 02:53:14|
|1        |1474410343    |2016-09-20 22:25:43|
|2        |1116610009    |2005-05-20 17:26:49|
|3        |1408024997    |2014-08-14 14:03:17|
+---------+--------------+-------------------+

