Użyj każdą z tych funkcji 
* `unix_timestamp()` 
* `date_format()`
* `to_unix_timestamp()`
* `from_unixtime()`
* `to_date()` 
* `to_timestamp()` 
* `from_utc_timestamp()` 
* `to_utc_timestamp()`

In [0]:
from pyspark.sql.functions import current_date, current_timestamp

kolumny = ["timestamp", "unix", "Date"]
dane = [("2015-03-22T14:13:34", 1646641525847, "May, 2021"),
        ("2015-03-22T15:03:18", 1646641557555, "Mar, 2021"),
        ("2015-03-22T14:38:39", 1646641578622, "Jan, 2021")]

dataFrame = spark.createDataFrame(dane, kolumny) \
    .withColumn("current_date", current_date()) \
    .withColumn("current_timestamp", current_timestamp())

display(dataFrame)

timestamp,unix,Date,current_date,current_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:30:39.926+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:30:39.926+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:30:39.926+0000


In [0]:

dataFrame.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)



## unix_timestamp(..) & cast(..)

Konwersja **string** to a **timestamp**.

Lokalizacja funkcji 
* `pyspark.sql.functions` in the case of Python
* `org.apache.spark.sql.functions` in the case of Scala & Java

## 1. Zmiana formatu wartości timestamp yyyy-MM-dd'T'HH:mm:ss 
`unix_timestamp(..)`

Dokumentacja API `unix_timestamp(..)`:
> Convert time string with given pattern (see <a href="http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html" target="_blank">SimpleDateFormat</a>) to Unix time stamp (in seconds), return null if fail.

`SimpleDataFormat` is part of the Java API and provides support for parsing and formatting date and time values.

In [0]:
from pyspark.sql.functions import unix_timestamp

dataFrame = dataFrame.withColumn("timestamp_unix", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))

display(dataFrame)


timestamp,unix,Date,current_date,current_timestamp,timestamp_unix
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:31:51.622+0000,1427033614
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:31:51.622+0000,1427036598
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:31:51.622+0000,1427035119


2. Zmień format zgodnie z klasą `SimpleDateFormat`**yyyy-MM-dd HH:mm:ss**
  * a. Wyświetl schemat i dane żeby sprawdzicz czy wartości się zmieniły

In [0]:

zmianaFormatu = dataFrame.withColumn("timestamp_unix", unix_timestamp("timestamp", "yyyy-MM-dd'T'HH:mm:ss"))

zmianaFormatu.printSchema()

root
 |-- timestamp: string (nullable = true)
 |-- unix: long (nullable = true)
 |-- Date: string (nullable = true)
 |-- current_date: date (nullable = false)
 |-- current_timestamp: timestamp (nullable = false)
 |-- timestamp_unix: long (nullable = true)



In [0]:
#unix_timestamp
from pyspark.sql.functions import from_unixtime

tempE = zmianaFormatu.withColumn("timestamp_formatted", from_unixtime("timestamp_unix", "yyyy-MM-dd HH:mm:ss"))

display(tempE)

timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:34:14.555+0000,1427033614,2015-03-22 14:13:34
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:34:14.555+0000,1427036598,2015-03-22 15:03:18
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:34:14.555+0000,1427035119,2015-03-22 14:38:39


## Stwórz nowe kolumny do DataFrame z wartościami year(..), month(..), dayofyear(..)

In [0]:
#date_format
from pyspark.sql.functions import year, month, dayofyear

yearDate = tempE.withColumn("year", year("timestamp_formatted")).withColumn("month", month("timestamp_formatted")).withColumn("dayofyear", dayofyear("timestamp_formatted"))

display(yearDate)

timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:34:18.657+0000,1427033614,2015-03-22 14:13:34,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:34:18.657+0000,1427036598,2015-03-22 15:03:18,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:34:18.657+0000,1427035119,2015-03-22 14:38:39,2015,3,81


In [0]:
#to_date()
from pyspark.sql.functions import to_date

toDate = tempE.withColumn("to_date", to_date(from_unixtime("timestamp_unix", "yyyy-MM-dd"))).withColumn("year", year("to_date")).withColumn("month", month("to_date")).withColumn("dayofyear", dayofyear("to_date"))

display(toDate)

timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,to_date,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:35:27.203+0000,1427033614,2015-03-22 14:13:34,2015-03-22,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:35:27.203+0000,1427036598,2015-03-22 15:03:18,2015-03-22,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:35:27.203+0000,1427035119,2015-03-22 14:38:39,2015-03-22,2015,3,81


In [0]:
#from_unixtime()
from pyspark.sql.functions import from_unixtime

fromUnix = tempE.withColumn("from_unix", from_unixtime("timestamp_unix", "yyyy-MM-dd HH:mm:ss")).withColumn("to_date", to_date("from_unix")).withColumn("year", year("to_date")).withColumn("month", month("to_date")).withColumn("dayofyear", dayofyear("to_date"))

display(fromUnix)

timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,from_unix,to_date,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:36:04.088+0000,1427033614,2015-03-22 14:13:34,2015-03-22 14:13:34,2015-03-22,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:36:04.088+0000,1427036598,2015-03-22 15:03:18,2015-03-22 15:03:18,2015-03-22,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:36:04.088+0000,1427035119,2015-03-22 14:38:39,2015-03-22 14:38:39,2015-03-22,2015,3,81


In [0]:
#to_timestamp()
from pyspark.sql.functions import to_timestamp

toTimestamp = tempE.withColumn("to_timestamp", to_timestamp("timestamp_formatted", "yyyy-MM-dd HH:mm:ss")).withColumn("year", year("to_timestamp")).withColumn("month", month("to_timestamp")).withColumn("dayofyear", dayofyear("to_timestamp"))

display(toTimestamp)


timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,to_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:36:31.388+0000,1427033614,2015-03-22 14:13:34,2015-03-22T14:13:34.000+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:36:31.388+0000,1427036598,2015-03-22 15:03:18,2015-03-22T15:03:18.000+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:36:31.388+0000,1427035119,2015-03-22 14:38:39,2015-03-22T14:38:39.000+0000,2015,3,81


In [0]:
#to_utc_timestamp()
from pyspark.sql.functions import to_utc_timestamp

toUtcTimestamp = tempE.withColumn("to_utc_timestamp", to_utc_timestamp("timestamp_formatted", "UTC")).withColumn("year", year("to_utc_timestamp")).withColumn("month", month("to_utc_timestamp")).withColumn("dayofyear", dayofyear("to_utc_timestamp"))

display(toUtcTimestamp)



timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,to_utc_timestamp,year,month,dayofyear
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:36:56.887+0000,1427033614,2015-03-22 14:13:34,2015-03-22T14:13:34.000+0000,2015,3,81
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:36:56.887+0000,1427036598,2015-03-22 15:03:18,2015-03-22T15:03:18.000+0000,2015,3,81
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:36:56.887+0000,1427035119,2015-03-22 14:38:39,2015-03-22T14:38:39.000+0000,2015,3,81


In [0]:
#from_utc_timestamp()
from pyspark.sql.functions import from_utc_timestamp

fromUtcTimestamp = toUtcTimestamp.withColumn("from_utc_timestamp", from_utc_timestamp("to_utc_timestamp", "UTC")).withColumn("year", year("from_utc_timestamp")).withColumn("month", month("from_utc_timestamp")).withColumn("dayofyear", dayofyear("from_utc_timestamp"))

display(fromUtcTimestamp)

timestamp,unix,Date,current_date,current_timestamp,timestamp_unix,timestamp_formatted,to_utc_timestamp,year,month,dayofyear,from_utc_timestamp
2015-03-22T14:13:34,1646641525847,"May, 2021",2025-03-18,2025-03-18T17:37:37.142+0000,1427033614,2015-03-22 14:13:34,2015-03-22T14:13:34.000+0000,2015,3,81,2015-03-22T14:13:34.000+0000
2015-03-22T15:03:18,1646641557555,"Mar, 2021",2025-03-18,2025-03-18T17:37:37.142+0000,1427036598,2015-03-22 15:03:18,2015-03-22T15:03:18.000+0000,2015,3,81,2015-03-22T15:03:18.000+0000
2015-03-22T14:38:39,1646641578622,"Jan, 2021",2025-03-18,2025-03-18T17:37:37.142+0000,1427035119,2015-03-22 14:38:39,2015-03-22T14:38:39.000+0000,2015,3,81,2015-03-22T14:38:39.000+0000


In [0]:
#%fs ls dbfs:/databricks-datasets/
