Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
from pyspark.sql.functions import current_date, current_timestamp

kolumny = ["timestamp", "unix", "Date"]
dane = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]

dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:19.313+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:19.313+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:19.313+0000


In [0]:

dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
from pyspark.sql.functions import *

dataFrame = dataFrame \
    .withColumn("unix_timestamp", unix_timestamp(col("timestamp"), "yyyy-MM-dd'T'HH:mm:ss")) \
    .withColumn("from_unixtime", from_unixtime(unix_timestamp(col("timestamp"), "yyyy-MM-dd'T'HH:mm:ss"))) 

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:20.088+0000,1427033614,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:20.088+0000,1427036598,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:20.088+0000,1427035119,2015-03-22 14:38:39


2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:
zmianaFormatu = dataFrame.withColumn("formatted_timestamp", date_format(col("timestamp"), "yyyy-MM-dd HH:mm:ss"))


zmianaFormatu.printSchema()
display(zmianaFormatu)

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- unix_timestamp: long (nullable = true)
 |-- from_unixtime: string (nullable = true)
 |-- formatted_timestamp: string (nullable = true)



timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,formatted_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:20.976+0000,1427033614,2015-03-22 14:13:34,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:20.976+0000,1427036598,2015-03-22 15:03:18,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:20.976+0000,1427035119,2015-03-22 14:38:39,2015-03-22 14:38:39


In [0]:
#unix_timestamp
tempE = zmianaFormatu.select("formatted_timestamp", "unix_timestamp", "from_unixtime")
display(tempE)

formatted_timestamp,unix_timestamp,from_unixtime
2015-03-22 14:13:34,1427033614,2015-03-22 14:13:34
2015-03-22 15:03:18,1427036598,2015-03-22 15:03:18
2015-03-22 14:38:39,1427035119,2015-03-22 14:38:39


## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
#date_format
yearDate = dataFrame.withColumn("year", year("timestamp"))
display(yearDate)

monthDate = dataFrame.withColumn("month", month("timestamp"))
display(monthDate)

dayofyearDate = dataFrame.withColumn("day of year", dayofyear("timestamp"))
display(dayofyearDate)
 

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,year
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:22.282+0000,1427033614,2015-03-22 14:13:34,2015
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:22.282+0000,1427036598,2015-03-22 15:03:18,2015
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:22.282+0000,1427035119,2015-03-22 14:38:39,2015


timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,month
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:22.828+0000,1427033614,2015-03-22 14:13:34,3
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:22.828+0000,1427036598,2015-03-22 15:03:18,3
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:22.828+0000,1427035119,2015-03-22 14:38:39,3


timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,day of year
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:23.344+0000,1427033614,2015-03-22 14:13:34,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:23.344+0000,1427036598,2015-03-22 15:03:18,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:23.344+0000,1427035119,2015-03-22 14:38:39,81


In [0]:
#to_date()
toDate = dataFrame.withColumn("date", to_date("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(toDate) # wyswietla tylko date yyyy-MM-dd

timestamp,unix,date,current_date,current_timestamp,unix_timestamp,from_unixtime
2015-03-22T14:13:34,1646641525847,2015-03-22,2025-03-11,2025-03-11T17:14:23.955+0000,1427033614,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,2015-03-22,2025-03-11,2025-03-11T17:14:23.955+0000,1427036598,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,2015-03-22,2025-03-11,2025-03-11T17:14:23.955+0000,1427035119,2015-03-22 14:38:39


In [0]:
#from_unixtime()
fromUnix = dataFrame.withColumn("unixtime", from_unixtime("unix_timestamp",  "yyyy-MM-dd HH:mm:ss")) # monza qzmienic odrazu format
display(fromUnix) # musimy dac czas w Unix timestamp -> na timestamp lub string

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,unixtime
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:24.633+0000,1427033614,2015-03-22 14:13:34,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:24.633+0000,1427036598,2015-03-22 15:03:18,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:24.633+0000,1427035119,2015-03-22 14:38:39,2015-03-22 14:38:39


In [0]:
#to_timestamp()
toTimestamp = dataFrame.withColumn("new", to_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))
display(toTimestamp) # z tekstowego na timestamp


timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,new
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:25.129+0000,1427033614,2015-03-22 14:13:34,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:25.129+0000,1427036598,2015-03-22 15:03:18,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:25.129+0000,1427035119,2015-03-22 14:38:39,2015-03-22T14:38:39.000+0000


In [0]:
#to_utc_timestamp()
toUtcTimestamp = dataFrame.withColumn("UTC", to_utc_timestamp(to_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"),"Europe/Warsaw"))
display(toUtcTimestamp)

# konwersja z timestamp na UTC - niezalezyny od strefy czsowej
# dwa argumenty, drugi to tz - time zone

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,UTC
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:25.726+0000,1427033614,2015-03-22 14:13:34,2015-03-22T13:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:25.726+0000,1427036598,2015-03-22 15:03:18,2015-03-22T14:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:25.726+0000,1427035119,2015-03-22 14:38:39,2015-03-22T13:38:39.000+0000


In [0]:
#from_utc_timestamp()
# DATAFRAME z wyzszego przykladu bo potrzebne sa dane w czasie UTC
fromUtcTimestamp = toUtcTimestamp.withColumn("local", from_utc_timestamp("UTC", "Asia/Singapore")) # zmiana strefy
display(fromUtcTimestamp)
# konwersja na okreslona strefe czasowa

timestamp,unix,Date,current_date,current_timestamp,unix_timestamp,from_unixtime,UTC,local
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-11,2025-03-11T17:14:26.238+0000,1427033614,2015-03-22 14:13:34,2015-03-22T13:13:34.000+0000,2015-03-22T21:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-11,2025-03-11T17:14:26.238+0000,1427036598,2015-03-22 15:03:18,2015-03-22T14:03:18.000+0000,2015-03-22T22:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-11,2025-03-11T17:14:26.238+0000,1427035119,2015-03-22 14:38:39,2015-03-22T13:38:39.000+0000,2015-03-22T21:38:39.000+0000
