In [None]:
# Import required modules
import pandas as pd
import glob2 # Finds all path names that match a specific pattern
# from openpyxl import load_workbook
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
# Specify paths to files
master_path = r'C:\Users\MT1070\Desktop\Master Call Volume' #\pilot-extracts'

# Display names of files in folder
file_names = glob2.glob(master_path + "/*.xlsx")

# Initialize an empty data frame to store data from all files
final_sheet = pd.DataFrame()

In [None]:
# Iteratively read data extracts and append them to the final DataFrame
for file in file_names:
    try:
        df = pd.read_excel(file, sheet_name=None, skipfooter=2, engine='openpyxl')
        df_concat = pd.concat(df.values(), ignore_index=True, sort=False)
        final_sheet = final_sheet.append(df_concat, ignore_index=True)
    except Exception as e:
        print(f"Error occurred while processing file: {file}")
        print(str(e))

In [None]:
# Read Speech data set and combine in excel file 
speech_report_path = r'C:\Users\MT1070\Desktop\Master Call Volume\speech-report-extracts'
speech_report_extracts = glob2.glob(speech_report_path + "/*.xlsx")


# Iteratively read additional files and merge new columns to the existing DataFrame
for additional_file in speech_report_extracts:
    try:
        df_additional = pd.read_excel(additional_file, sheet_name=None, skipfooter=2, engine='openpyxl')
        df_concat_additional = pd.concat(df_additional.values(), ignore_index=True, sort=False)
        final_sheet = pd.concat([final_sheet, df_concat_additional], axis=0)
    except Exception as e:
        print(f"Error occurred while processing additional file: {additional_file}")
        print(str(e))
        print("Error row:")
        print(df_additional)

        
# Reset index to use data col
final_sheet.reset_index(drop=True, inplace=True)
# final_sheet.head()

In [None]:
# Combine data into a new Excel file
master_file_path = r"C:\Users\MT1070\Desktop\Master Call Volume\SpinSci Call Volume Evaluation.xlsx"
final_sheet.to_excel(master_file_path, sheet_name='Raw Data', index=False)

In [None]:
# final_sheet.head()

In [None]:
# Specify paths to files
file_path = r'C:\Users\MT1070\Desktop\Master Call Volume\SpinSci Call Volume Evaluation.xlsx'

# Read file and skip the first row
df = pd.read_excel(file_path, skiprows=[0])

In [None]:
# Reformat date col and Remove rows where "date" is present in the "date" column
# data = df[~df['date'].str.contains('date', case=False, na=False)]
data = df[~df['date'].str.contains('date', case=False, na=False)]
data['date'] = pd.to_datetime(data['date'], format='%m/%d/%y', errors='coerce', exact=False, infer_datetime_format=True)

# data['date'] = pd.to_datetime(data['date'], format='%m/%d/%y', errors='coerce', exact=False, infer_datetime_format=True).dt.date

# Filter the data to include only dates within the year 2023
# data.loc[data['date'].dt.year != 2023, 'date'] = data.loc[data['date'].dt.year != 2023, 'date'].apply(lambda x: x.replace(year=2023))
data = data[data['date'].dt.year == 2023]

# Extract the date component without the time slot
data['date'] = data['date'].dt.date

# Group the data set based on calendar date
filtered_data = data.groupby("date").sum()

# Reset index to use data col
filtered_data = filtered_data.reset_index()
filtered_data.head()

In [None]:
# Create column by weekday
filtered_data['weekday'] = filtered_data['date'].dt.dayofweek

In [None]:
# Plotting internal, external, and call transfers call volumes overlayed
# Creating subplots
fig, axes = plt.subplots(figsize=(8, 6))

# Plotting the first subplot
line1, = axes.plot(filtered_data['date'], filtered_data['total_internal_calls'], label='Total Internal Calls')
line2, = axes.plot(filtered_data['date'], filtered_data['total_external_calls'], label='Total External Calls') 
line3, = axes.plot(filtered_data['date'], filtered_data['total_calls_transfered'], label='Total Calls Transferred') 

axes.set_xlabel('Date')
axes.set_ylabel('Number of Calls Handled')
axes.set_title('Total Daily Calls Handled from 3/7 - 5/31')

# Adjusting the x-axis tick labels
plt.setp(axes.xaxis.get_majorticklabels(), rotation=45)

# Add legend
axes.legend(handles=[line1, line2, line3], loc='upper right')

# Adjusting the spacing between subplots
plt.tight_layout(pad=1.5)

# Displaying the plots
plt.show()

In [None]:
## Plot English and Spanish Language usage overlayed
fig, axes = plt.subplots(figsize = (8, 6))

# Plotting the first subplot
line1, = axes.plot(filtered_data['date'], filtered_data['total_english_calls'], label='Total English Calls')
line2, = axes.plot(filtered_data['date'], filtered_data['total_spanish_calls'], label='Total Spanish Calls') 

axes.set_xlabel('Date')
axes.set_ylabel('Call Volume by Language Type')
axes.set_title('English and Spanish Call Volume from 3/7 - 5/31')

# Adjusting the x-axis tick labels
plt.setp(axes.xaxis.get_majorticklabels(), rotation=45)

# Add legend
axes.legend(handles=[line1, line2, line3], loc='upper right')

# Adjusting the spacing between subplots
plt.tight_layout(pad=1.5)

# Displaying the plots
plt.show()

In [None]:
# Create table with the largest call handling since start of pilot.

# Sort the filtered_data DataFrame by col in descending order and get the top 5 values
top_col1_values = filtered_data.nlargest(5, 'total_internal_calls')
top_col2_values = filtered_data.nlargest(5, 'total_external_calls')
top_col3_values = filtered_data.nlargest(5, 'total_calls_transfered')

# Create the output table
output_table = pd.DataFrame({'Top 5 Peak Values (total_internal_calls)': top_col1_values['total_internal_calls'].values,
                            'Top 5 Peak Values (total_external_calls)': top_col2_values['total_external_calls'].values,
                            'Top 5 Peak Values (total_calls_transferred)': top_col3_values['total_calls_transfered'].values},
                           index=top_col1_values['date'])

# Display the output table
print(output_table)