### ![Spark Logo Tiny](https://files.training.databricks.com/images/105/logo_spark_tiny.png) Timestamp functions

In [0]:
from pyspark.sql.functions import *

data = [['1','2020-02-01 11 01 19 06'],
        ['2','2019-03-01 12 01 19 27'],
        ['3','2021-06-15 10 02 20 44']
       ]

df = spark.createDataFrame(data,['id','input'])
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)

+---+----------------------+
|id |input                 |
+---+----------------------+
|1  |2020-02-01 11 01 19 06|
|2  |2019-03-01 12 01 19 27|
|3  |2021-06-15 10 02 20 44|
+---+----------------------+



#### current_timestamp( )

In [0]:
df_modif = df.withColumn('current_timestamp',current_timestamp())

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)
 |-- current_timestamp: timestamp (nullable = false)

+---+----------------------+-----------------------+
|id |input                 |current_timestamp      |
+---+----------------------+-----------------------+
|1  |2020-02-01 11 01 19 06|2023-01-12 23:11:16.282|
|2  |2019-03-01 12 01 19 27|2023-01-12 23:11:16.282|
|3  |2021-06-15 10 02 20 44|2023-01-12 23:11:16.282|
+---+----------------------+-----------------------+



In [0]:
df_modif = df.select(current_timestamp().alias('current_timestamp'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- current_timestamp: timestamp (nullable = false)

+-----------------------+
|current_timestamp      |
+-----------------------+
|2023-01-12 23:11:16.754|
|2023-01-12 23:11:16.754|
|2023-01-12 23:11:16.754|
+-----------------------+



#### to_timestamp( )

Convierte un string timestamp a un formato de tipo Timestamp.

##### String a Timestamp

In [0]:
df_modif = df.select(col('input'),to_timestamp(col('input'),'yyyy-MM-dd HH mm ss SSS').alias('fecha_nueva'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- input: string (nullable = true)
 |-- fecha_nueva: timestamp (nullable = true)

+----------------------+----------------------+
|input                 |fecha_nueva           |
+----------------------+----------------------+
|2020-02-01 11 01 19 06|2020-02-01 11:01:19.06|
|2019-03-01 12 01 19 27|2019-03-01 12:01:19.27|
|2021-06-15 10 02 20 44|2021-06-15 10:02:20.44|
+----------------------+----------------------+



##### mm/dd/yyyy a Timestamp

In [0]:
from pyspark.sql.functions import *

df = spark.createDataFrame([('11/25/1991',),('12/24/1992',),('10/15/1994',)],['date_str'])

df.printSchema()
df.show()

root
 |-- date_str: string (nullable = true)

+----------+
|  date_str|
+----------+
|11/25/1991|
|12/24/1992|
|10/15/1994|
+----------+



In [0]:
df_modif = df.select('date_str',from_unixtime(unix_timestamp('date_str','MM/dd/yyyy')).alias('fecha_nueva'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- date_str: string (nullable = true)
 |-- fecha_nueva: string (nullable = true)

+----------+-------------------+
|date_str  |fecha_nueva        |
+----------+-------------------+
|11/25/1991|1991-11-25 00:00:00|
|12/24/1992|1992-12-24 00:00:00|
|10/15/1994|1994-10-15 00:00:00|
+----------+-------------------+



In [0]:
df_timestamp = df_modif.select(col('fecha_nueva'),to_timestamp(col('fecha_nueva'),'yyyy-MM-dd HH:mm:ss').alias('timestamp'))

df_timestamp.printSchema()
df_timestamp.show(truncate=False)

root
 |-- fecha_nueva: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+-------------------+-------------------+
|fecha_nueva        |timestamp          |
+-------------------+-------------------+
|1991-11-25 00:00:00|1991-11-25 00:00:00|
|1992-12-24 00:00:00|1992-12-24 00:00:00|
|1994-10-15 00:00:00|1994-10-15 00:00:00|
+-------------------+-------------------+



#### hour( )
#### minute( )
#### second( )

In [0]:
from pyspark.sql.functions import *

data = [['1','2020-02-01 11:01:19.06'],
        ['2','2019-03-01 12:01:19.27'],
        ['3','2021-06-15 10:02:20.44']
       ]

df = spark.createDataFrame(data,['id','input'])
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input: string (nullable = true)

+---+----------------------+
|id |input                 |
+---+----------------------+
|1  |2020-02-01 11:01:19.06|
|2  |2019-03-01 12:01:19.27|
|3  |2021-06-15 10:02:20.44|
+---+----------------------+



In [0]:
df_modif = df.select(col('input'),
              hour(col('input')).alias('hour'),
              minute(col('input')).alias('minute'),
              second(col('input')).alias('second'),
             )

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- input: string (nullable = true)
 |-- hour: integer (nullable = true)
 |-- minute: integer (nullable = true)
 |-- second: integer (nullable = true)

+----------------------+----+------+------+
|input                 |hour|minute|second|
+----------------------+----+------+------+
|2020-02-01 11:01:19.06|11  |1     |19    |
|2019-03-01 12:01:19.27|12  |1     |19    |
|2021-06-15 10:02:20.44|10  |2     |20    |
+----------------------+----+------+------+



#### from_unixtime( )

In [0]:
from pyspark.sql.functions import *

data = [(0,1100746394),(1,1474410343),(2,1116610009),(3,1408024997)]

columnas = ['local_id','fecha_apertura']

df = spark.createDataFrame(data,'local_int INT, fecha_apertura INT')

df.printSchema()
df.show()

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)

+---------+--------------+
|local_int|fecha_apertura|
+---------+--------------+
|        0|    1100746394|
|        1|    1474410343|
|        2|    1116610009|
|        3|    1408024997|
+---------+--------------+



##### Ejemplo 1

UNIX epoch (Integer) a String (con formato Timestamp)

In [0]:
df_modif = df.withColumn('fecha_apertura_string', from_unixtime('fecha_apertura'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- fecha_apertura_string: string (nullable = true)

+---------+--------------+---------------------+
|local_int|fecha_apertura|fecha_apertura_string|
+---------+--------------+---------------------+
|0        |1100746394    |2004-11-18 02:53:14  |
|1        |1474410343    |2016-09-20 22:25:43  |
|2        |1116610009    |2005-05-20 17:26:49  |
|3        |1408024997    |2014-08-14 14:03:17  |
+---------+--------------+---------------------+



##### Ejemplo 2

UNIX epoch (Integer) a String

Este ejemplo no se puede utilizar con date_format, dado que dicha función solo acepta día, mes y año y no horas y segundos.

In [0]:
formato = 'EEEE, MMM d, yyyy h:mm a'
df_modif = df.withColumn('fecha_apertura_string', from_unixtime('fecha_apertura', formato))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- fecha_apertura_string: string (nullable = true)

+---------+--------------+------------------------------+
|local_int|fecha_apertura|fecha_apertura_string         |
+---------+--------------+------------------------------+
|0        |1100746394    |Thursday, Nov 18, 2004 2:53 AM|
|1        |1474410343    |Tuesday, Sep 20, 2016 10:25 PM|
|2        |1116610009    |Friday, May 20, 2005 5:26 PM  |
|3        |1408024997    |Thursday, Aug 14, 2014 2:03 PM|
+---------+--------------+------------------------------+



#### cast()

In [0]:
from pyspark.sql.functions import *

data = [(0,1100746394),(1,1474410343),(2,1116610009),(3,1408024997)]

columnas = ['local_id','fecha_apertura']

df = spark.createDataFrame(data,'local_int INT, fecha_apertura INT')

df.printSchema()
df.show()

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)

+---------+--------------+
|local_int|fecha_apertura|
+---------+--------------+
|        0|    1100746394|
|        1|    1474410343|
|        2|    1116610009|
|        3|    1408024997|
+---------+--------------+



In [0]:
df_modif = df.withColumn('timestamp', col('fecha_apertura').cast('timestamp'))

df_modif.printSchema()
df_modif.show(truncate=False)

root
 |-- local_int: integer (nullable = true)
 |-- fecha_apertura: integer (nullable = true)
 |-- timestamp: timestamp (nullable = true)

+---------+--------------+-------------------+
|local_int|fecha_apertura|timestamp          |
+---------+--------------+-------------------+
|0        |1100746394    |2004-11-18 02:53:14|
|1        |1474410343    |2016-09-20 22:25:43|
|2        |1116610009    |2005-05-20 17:26:49|
|3        |1408024997    |2014-08-14 14:03:17|
+---------+--------------+-------------------+

