## import packages

In [1]:
import os
from datetime import datetime, timedelta
import pandas as pd

from pandas import DataFrame, Series
from typing import List

from dotenv import load_dotenv

load_dotenv()

True

## Variables

### Env

In [2]:
INPUT_FOLDER: str = os.getenv('INPUT_FOLDER_PATH')

### other

In [3]:
time_columns: list = ['System', 'Queue', 'Ring', 'Talk', 'Hold', 'ACW', 'Consult', 'Disposition', 'Duration']
vdn_flow_columns: list = ['1', '2', '3', '4', '5', '6', '7', '8', '9']
call_work_codes_columns: list = ['1.1', '2.1', '3.1', '4.1', '5.1']

## Functions

### summarize_unused_columns

In [4]:
def summarize_unused_columns(dataset_column_index: pd.core.indexes.base.Index) -> List[str]:
   """
   
   """
   unused_column_names: List[str] = [col_name for col_name in dataset_column_index if 'unnamed' in str(col_name).lower()]
   return unused_column_names

### filter_unused_columns

In [5]:
def filter_unused_columns(dataset: DataFrame) -> DataFrame:
    """
    
    """
    input_dataset: DataFrame = dataset.copy()
    unused_column_names: List[str] = summarize_unused_columns(dataset.columns)
    clean_df: DataFrame = input_dataset.drop(labels=unused_column_names, axis='columns')
    return clean_df

### set_datetime_dtype_values

In [6]:
def set_datetime_dtype_values(dataset: DataFrame, columns: List[str]) -> DataFrame:
    """
    
    """
    for col_name in columns:
        dataset[col_name] = dataset[col_name].map(lambda str_value: datetime.strptime(str_value, "%Y-%m-%d %H:%M:%S"))

    return dataset

### timedelta_from_string

In [7]:
def timedelta_from_string(timestring: str) -> timedelta:
    """
    
    """
    time_obj = datetime.strptime(timestring,"%H:%M:%S")
    timedelta_obj: timedelta = timedelta(hours=time_obj.hour, minutes=time_obj.minute, seconds=time_obj.second)
    return timedelta_obj

### timestring_to_seconds

In [8]:
def timestring_to_seconds(dataset: DataFrame, columns: List[str]) -> DataFrame:
    """
    
    """
    for col_name in columns:
        dataset[col_name] = dataset[col_name].map(lambda timestring: timedelta_from_string(timestring))

    return dataset

### get_avg_time

In [9]:
def get_avg_time(dataset: DataFrame, columns: List[str]) -> timedelta:
    """
    
    """
    subset: DataFrame = dataset[columns].copy()
    subset['row_sum'] = subset.sum(axis='columns')
    average: timedelta = subset['row_sum'].mean()
    return average

## core logic

### test read

In [10]:
test_df: DataFrame = pd.read_excel(os.path.join(INPUT_FOLDER, 'Week 2 -Mei.xlsx'), header=1)
test_df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1
0,Rijlabels,
1,95020 (Patricia Ringers),
2,95021 (Rosie Bhagwanie),
3,95022 (Yamani Mondt),
4,95112 (Gilles van Loon),


### test filter unused columns

In [11]:
test_df = filter_unused_columns(dataset=test_df)
test_df.columns

Index([], dtype='object')

### test set datetime dtype

In [12]:
test_value = test_df["Start"][0]
print(test_value, type(test_value))

KeyError: 'Start'

In [None]:
test_df = set_datetime_dtype_values(test_df, columns=['Start'])

In [None]:
test_value = test_df["Start"][0]
print(test_value, type(test_value))

2025-05-06 08:00:33 <class 'pandas._libs.tslibs.timestamps.Timestamp'>


### test timestring to seconds

In [None]:
test_value = test_df['System'][0]
print(test_value, type(test_value))

00:00:59 <class 'str'>


In [None]:
result = timedelta_from_string(test_value)
result

datetime.timedelta(seconds=59)

In [None]:
test_df = timestring_to_seconds(test_df, ['System'])

In [None]:
test_value = test_df['System'][0]
print(test_value, type(test_value))

0 days 00:00:59 <class 'pandas._libs.tslibs.timedeltas.Timedelta'>


### test avg calculations

In [None]:
test_df = timestring_to_seconds(test_df, time_columns)
avg_reaction_time: timedelta = get_avg_time(test_df, columns=['System', 'Queue', 'Ring'])
avg_reaction_time

TypeError: strptime() argument 1 must be str, not Timedelta