### Load modules and specify file path

In [None]:
# Import required modules
import pandas as pd
import glob2
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Specify paths to files
raw_data_path = r'C:\Users\MT1070\Desktop\Master Call Volume\raw-report-extracts' 

# Display names of files in master folder
raw_files = glob2.glob(raw_data_path + "/*.xlsx")

# Initialize an empty data frame to store data from all files
final_sheet = pd.DataFrame()

### Read Data Set and Configure Entry Date 

In [None]:
# Iteratively read data extracts from master files and append them to the final DataFrame
data_frames = []  # Store DataFrames from master files
for file in raw_files:
    try:
        df = pd.read_excel(file, sheet_name=None, skipfooter=2, engine='openpyxl')
        df_concat = pd.concat(df.values(), ignore_index=True, sort=False)
        data_frames.append(df_concat)
    except Exception as e:
        print(f"Error occurred while processing file: {file}")
        print(str(e))

## Reset Index and Set Column Headers

In [54]:
if len(data_frames) > 0:
    final_sheet = pd.concat(data_frames, ignore_index=True)

# Set desired column as 

final_sheet.reset_index(drop=True, inplace=True)

# Define headers
final_sheet.columns = final_sheet.iloc[0]
final_sheet = final_sheet[1:]


### Extract Date from Datetime column

In [56]:
# Print the values in the "datetime" column
column_name = 'datetime'
if column_name.strip() in final_sheet.columns:
    final_sheet[column_name.strip()] = pd.to_datetime(final_sheet[column_name.strip()], format='%m/%d/%y %I:%M:%S %p', errors='coerce')

    # Check the data type of the "datetime" column after conversion
    datetime_column = final_sheet[column_name.strip()]
    # data_type = datetime_column.dtype
    # print(f"The data type of elements in the '{column_name}' column after conversion is: {data_type}")

    # Create the 'date' column by extracting month/date/year values
    final_sheet['date'] = final_sheet[column_name.strip()].dt.strftime('%m/%d/%Y')

    # Print the datetime entries that cannot be parsed
    invalid_entries = final_sheet[datetime_column.isna()][column_name.strip()]
    print(f"Invalid datetime entries:")
    print(invalid_entries)


else:
    print(f"Column '{column_name.strip()}' not found in the DataFrame.")


Invalid datetime entries:
2978    NaT
5879    NaT
8701    NaT
9632    NaT
10381   NaT
13550   NaT
Name: datetime, dtype: datetime64[ns]


### Count Entries for Successful & Unsuccessful Calls Transferred

In [66]:
# Count number of successful workflows
first_try_success = final_sheet['department_utternace_first_try_success'].count()
second_try_success = final_sheet['department_utternace_second_try_success'].count()
third_try_success = final_sheet['department_utternace_second_try_success'].count()
total_success = first_try_success + second_try_success + third_try_success
spinsci_success_count = final_sheet['call_transfer_success'].count()

# Output Successful Counts
print(f"Number of entries in First Try Success: {first_try_success}")
print(f"Number of entries in Second Try Success: {second_try_success}")
print(f"Number of entries in Third Try Success: {third_try_success}")
print(f"Total number of successful call transfers: {total_success}\n")
print(f"Total number of SpinSci ID'd successful call transfers: {spinsci_success_count}\n")


# Count number of unsuccessful workflows
first_nomatch = final_sheet['department_utternace_first_nomatch'].count()
second_nomatch = final_sheet['department_utternace_second_nomatch'].count()
third_nomatch = final_sheet['department_utternace_third_nomatch'].count()
final_nomatch = final_sheet['department_utternace_max_nomatch'].count()
total_nomatch = first_nomatch + second_nomatch + third_nomatch + final_nomatch

# Output Usuccessful Counts
print(f"Number of unsuccessful first try entries: {first_nomatch}")
print(f"Number of unsuccessful second try entries: {second_nomatch}")
print(f"Number of unsuccessful third try entries: {third_nomatch}")
print(f"Total number of unsuccessful calls: {total_nomatch}")



Number of entries in First Try Success: 2972
Number of entries in Second Try Success: 689
Number of entries in Third Try Success: 689
Total number of successful call transfers: 4350

Total number of SpinSci ID'd successful call transfers: 11417

Number of unsuccessful first try entries: 1123
Number of unsuccessful second try entries: 458
Number of unsuccessful third try entries: 6
Total number of unsuccessful calls: 1587


### Graveyard

### Locate Invalid Entries 

In [None]:
column_name = 'datetime'
if column_name.strip() in final_sheet.columns:
    final_sheet[column_name.strip()] = pd.to_datetime(final_sheet[column_name.strip()], format='%m/%d/%y %I:%M:%S %p', errors='coerce')

    # Check the data type of the "datetime" column after conversion
    datetime_column = final_sheet[column_name.strip()]
    data_type = datetime_column.dtype
    print(f"The data type of elements in the '{column_name}' column after conversion is: {data_type}")

    # Print the datetime entries that cannot be parsed
    invalid_entries = final_sheet[datetime_column.isna()][column_name.strip()]
    print(f"Invalid datetime entries:")
    print(invalid_entries)

# Utilize the output of the script above to find the NaN values.
index_value = 2978
print(final_sheet.iloc[index_value])

In [None]:
def convert_to_datetime(df, column_name):
    if column_name in df.columns:
        if column_name == 'datetime':
            try:
                df['Date'] = df[column_name].dt.date
                # df[column_name] = pd.to_datetime(df[column_name], format='%m/%d/%y %I:%M:%S %p')
                return df
            except ValueError:
                print(f"Invalid datetime format in column {column_name}:")
                print(df[column_name])
        else:
            df[column_name] = pd.to_datetime(df[column_name], format='%m/%d/%y %I:%M:%S %p')
            return df
    else:
        print(f"Column '{column_name} not found in Dataframe.")
        return None
    
final_sheet = convert_to_datetime(final_sheet, 'datetime')

# print(final_sheet)