In [251]:
import os
import json
import openai
import pandas as pd
import datetime as dt
import streamlit as st
import unidecode as ud
from unidecode import unidecode
from pptx import Presentation

In [252]:
##############
# Input file locations and names
##############

acn_login = os.getlogin()
acn_path = "C:/Users/" + acn_login

CV_path = acn_path + "/Accenture/Warsaw Analytics - Documents/01_CVs/"
CV_flnm = "Warsaw_Analytics_FY23_template.pptx"
CV_file = CV_path + CV_flnm

AV_path = acn_path + "/Accenture/Staffing & productivity AI group - General/Dashboard/"
AV_flnm = "myScheduling_People_Extract.xlsx"
AV_file = AV_path + AV_flnm

LCR_flnm = "Staffing_data - LCR calc.xlsx"
LCR_file = AV_path + LCR_flnm

Promo_path = acn_path + "/Accenture/AI Executives Warsaw - General/"
Promo_flnm = "AI Ind Hub - promo slides.pptx"
Promo_file = Promo_path + Promo_flnm

App_path = acn_path + "/Desktop/genAI/CVapp"
Sel_txt = CV_path + '/CVapp/sel_list.txt'

In [253]:
# Load configuration from JSON file
with open(App_path + "/config.json", mode="r") as f:
    config = json.load(f)

client = openai.AzureOpenAI(
        azure_endpoint=config["AZURE_ENDPOINT"],
        api_key= config["AZURE_API_KEY"],
        api_version="2023-12-01-preview")

In [254]:
##############
# Scraping the pptx to produce a table with slidenums, names and positions
##############

CVlist = []
shape_list = []

for slide in CVprs.slides:

    sldnm = CVprs.slides.index(slide)+1
    sldnt = slide.notes_slide
    if sldnt.notes_text_frame:
        sldnt_text = sldnt.notes_text_frame.text

    for shape in slide.shapes:
        shape_properties = {
        'sld_nm': sldnm,
        'sld_nt': sldnt_text,
        'name': shape.name
        }

        # Check if the shape is a text shape and if it has text
        if shape.has_text_frame:
            text = ""
            for paragraph in shape.text_frame.paragraphs:
                text += paragraph.text + "\n"
            shape_properties['text'] = text.strip()  # Strip to remove trailing newline

        shape_list.append(shape_properties)
    shapes_df = pd.DataFrame(shape_list)
print(shapes_df)
names_df = shapes_df[['sld_nm', 'sld_nt']].drop_duplicates()
names_df = names_df.applymap(lambda x: x.lower() if isinstance(x, str) else x)
names_df

      sld_nm              sld_nt                    name   
0          1       anna.adamczyk              Picture 11  \
1          1       anna.adamczyk      Text Placeholder 1   
2          1       anna.adamczyk      Text Placeholder 2   
3          1       anna.adamczyk      Text Placeholder 3   
4          1       anna.adamczyk      Text Placeholder 4   
...      ...                 ...                     ...   
1064     117  krzysztof.babinski      Text Placeholder 4   
1065     117  krzysztof.babinski  Picture Placeholder 11   
1066     117  krzysztof.babinski      Text Placeholder 6   
1067     117  krzysztof.babinski      Text Placeholder 7   
1068     117  krzysztof.babinski      Text Placeholder 8   

                                                   text  
0                                                   NaN  
1     Anna is a  Senior Data Scientist with an Earth...  
2            Media\nMarket Research\nRetail\nGeophysics  
3                                  Anna Adamczy

Unnamed: 0,sld_nm,sld_nt
0,1,anna.adamczyk
9,2,anna.adamczyk
18,3,michal.antczak
27,4,peter.benak
36,5,jerzy.blaszczynski
...,...,...
1017,113,kamil.ziajko
1026,114,barbara.zuk
1035,115,
1051,116,


In [256]:
# Loading input files

AV_df = pd.read_excel(AV_file, 'People_Extract', header=0)
AV_df = AV_df[['Resource Name', 'Resource Email', 'First Availability Date']].dropna()
AV_df['EID'] = AV_df['Resource Email'].str.split('@').str[0]
AV_df['Resource Name'] = AV_df['Resource Name'].str.title()

All_df = pd.read_excel(LCR_file, 'LCR', header=0)
All_df = All_df[['Worker', 'Enterprise ID', 'Management Level', 'People Lead', 'LCR in $']]
All_df = pd.merge(All_df, AV_df, left_on='Enterprise ID', right_on='EID', how='outer')
All_df['EID'] = All_df['EID'].fillna(All_df['Enterprise ID'])
All_df = pd.merge(All_df, names_df, left_on='EID', right_on='sld_nt', how='outer')
All_df

Unnamed: 0,Worker,Enterprise ID,Management Level,People Lead,LCR in $,Resource Name,Resource Email,First Availability Date,EID,sld_nm,sld_nt
0,Oleksandr Konopka,oleksandr.konopka,11-Analyst,Aleksandra Bilas,26,,,NaT,oleksandr.konopka,,
1,Natalia Słomka,natalia.slomka,11-Analyst,Michal Nowocien,26,"Slomka,Natalia",natalia.slomka@accenture.com,2024-04-01,natalia.slomka,90.0,natalia.slomka
2,Piotr Trzcionkowski,piotr.trzcionkowski,11-Analyst,Kaja Cieslukowska,26,"Trzcionkowski,Piotr",piotr.trzcionkowski@accenture.com,2024-07-01,piotr.trzcionkowski,102.0,piotr.trzcionkowski
3,Wiktor Florek,wiktor.florek,11-Analyst,Kamila Jakubowska,28,"Florek,Wiktor",wiktor.florek@accenture.com,2025-01-01,wiktor.florek,17.0,wiktor.florek
4,Daniel Paprocki,daniel.paprocki,11-Analyst,Beata Sliwinska,28,"Paprocki,Daniel",daniel.paprocki@accenture.com,2025-01-01,daniel.paprocki,73.0,daniel.paprocki
...,...,...,...,...,...,...,...,...,...,...,...
123,,,,,,,,NaT,,115.0,
124,,,,,,,,NaT,,116.0,
125,,,,,,,,NaT,,55.0,akshay.manchakadavath
126,,,,,,,,NaT,,56.0,bartosz.mazur


In [243]:
# Function to fill missing Worker and Resource Name
def fill_missing_values(row):
    if pd.isna(row['EID']) and pd.notna(row['Enterprise ID']):
        row['EID'] = row['Enterprise ID']
    if pd.isna(row['Worker']) and pd.notna(row['sld_nt']):
        row['Worker'] = row['sld_nt'].title().replace('.', ' ')
    if pd.isna(row['Resource Name']):
        if pd.notna(row['Worker']):
            row['Resource Name'] = ', '.join(row['Worker'].split()[::-1])
    return row

# Apply the function to fill missing values
All_df = All_df.apply(fill_missing_values, axis=1)
All_df = All_df[['Worker', 'Resource Name', 'EID', 'Management Level', 'People Lead', 'LCR in $', 'First Availability Date']].sort_values('Resource Name')
All_df

Unnamed: 0,Worker,Resource Name,EID,Management Level,People Lead,LCR in $,First Availability Date
123,,,,,,,NaT
127,,,,,,,NaT
126,,,,,,,NaT
125,,,,,,,NaT
124,,,,,,,NaT
...,...,...,...,...,...,...,...
113,,"Zbiegien,Justyna",justyna.zbiegien,,,,2024-04-01
141,Kamil Ziajko,"Ziajko, Kamil",,,,,NaT
114,,"Ziajko,Kamil",kamil.ziajko,,,,2024-08-01
142,Barbara Zuk,"Zuk, Barbara",,,,,NaT
