In [1]:
from datetime import datetime
from pyspark.sql import Row
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import to_date, col

spark = SparkSession.builder.getOrCreate()

data = [
    {
        "delivery_id": 38,
        "driver_id": "012F0A15AFA44CFB8D95FD7E1A8C4DE6",
        "lat": 38.92751813710914,
        "lng": -77.04377994977315,
        "timestamp": "2021-10-25T13:51:00.569154"
    },
    {

        "delivery_id": 37,
        "driver_id": "012F0A15AFA44CFB8D95FD7E1A8C4DE6",
        "lat": 38.927670296049904,
        "lng": -77.04365646089744,
        "timestamp": "2021-10-25T13:51:03.569161"
    },
    {
        "delivery_id": 38,
        "driver_id": "012F0A15AFA44CFB8D95FD7E1A8C4DE6",
        "lat": 38.92782245485607,
        "lng": -77.04353297149412,
        "timestamp": "2021-10-26T13:51:06.569168"
    },
    {
        "delivery_id": 37,
        "driver_id": "012F0A15AFA44CFB8D95FD7E1A8C4DE6",
        "lat": 38.927974613527624,
        "lng": -77.04340948156317,
        "timestamp": "2021-10-26T13:51:09.569175"
    }
]

In [2]:
schema = StructType([
    StructField("delivery_id", IntegerType()),
    StructField("driver_id", StringType()),
    StructField("lat", StringType()),
    StructField("lng", StringType()),
    StructField("timestamp", TimestampType())])


def get_datetime(timestamp_str):
    return datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f")


def dict_to_row(dct):
    return Row(delivery_id=dct['delivery_id'],
               driver_id=dct['driver_id'],
               lat=dct['lat'],
               lng=dct['lng'],
               timestamp=get_datetime(dct['timestamp']))


rows = list(map(dict_to_row, data))
df = spark.createDataFrame(rows, schema)
df.show()

df \
    .withColumn('date', to_date(col('timestamp'), 'yyyy-MM-dd'))\
    .groupBy('driver_id', 'date')\
    .count()\
    .withColumnRenamed('count', 'total_pings')\
    .show()

+-----------+--------------------+------------------+------------------+--------------------+
|delivery_id|           driver_id|               lat|               lng|           timestamp|
+-----------+--------------------+------------------+------------------+--------------------+
|         38|012F0A15AFA44CFB8...| 38.92751813710914|-77.04377994977315|2021-10-25 13:51:...|
|         37|012F0A15AFA44CFB8...|38.927670296049904|-77.04365646089744|2021-10-25 13:51:...|
|         38|012F0A15AFA44CFB8...| 38.92782245485607|-77.04353297149412|2021-10-26 13:51:...|
|         37|012F0A15AFA44CFB8...|38.927974613527624|-77.04340948156317|2021-10-26 13:51:...|
+-----------+--------------------+------------------+------------------+--------------------+

+--------------------+----------+-----------+
|           driver_id|      date|total_pings|
+--------------------+----------+-----------+
|012F0A15AFA44CFB8...|2021-10-25|          2|
|012F0A15AFA44CFB8...|2021-10-26|          2|
+----------------