### Loaded: Utilities Functions

for getting:
- date_pairs from start date and end date
- 'melting' Spark df to transform wide data to long format

In [0]:
import pandas as pd
from pyspark.sql.functions import array, col, explode, lit, struct
from pyspark.sql import DataFrame
from typing import Iterable 

In [0]:
def get_time_pairs(first_date,last_date):
    '''Creates list of tuples with 2 dates each that are 1 day apart, e.g., "[(2022-10-10, 2022-10-11)]" '''
    dates = pd.date_range(start=first_date, end=last_date,freq="1D").to_pydatetime().tolist()
    shift = [date + pd.Timedelta(days=1) for date in dates]
    return list(zip(dates, shift))

In [0]:
#Taken from StackOverflow: https://stackoverflow.com/questions/41670103/how-to-melt-spark-dataframe
def melt(
        df: DataFrame, 
        id_vars: Iterable[str], value_vars: Iterable[str], 
        var_name: str="variable", value_name: str="value"):
    
    '''Function which takes a wide-format Spark dataframe and makes it into long-format.
    Notes: Taken from StackOverflow: https://stackoverflow.com/questions/41670103/how-to-melt-spark-dataframe'''
    
    # Create array<struct<variable: str, value: ...>>
    _vars_and_vals = array(*(
        struct(lit(c).alias(var_name), col(c).alias(value_name)) 
        for c in value_vars))

    # Add to the DataFrame and explode
    _tmp = df.withColumn("_vars_and_vals", explode(_vars_and_vals))

    cols = id_vars + [
            col("_vars_and_vals")[x].alias(x) for x in [var_name, value_name]]
    return _tmp.select(*cols)

In [0]:
def sum_all_sparkdf_cols(df,cols_to_remove):
    #aggregating values over all days and removing 'date' field (to match other week-level KPIs):
    col_names = df.columns
    for col in cols_to_remove:
        col_names.remove(col)
        
    df = df.agg({col:"sum" for col in col_names})

    #Resetting names of columns:
    col_names = df.columns
    col_names = [re.search(r'sum\((.*)\)', col).group(1) for col in col_names]
    df = df.toDF(*col_names)
    
    return df