In [2]:
import os
import pandas as pd
import re
import numpy as np
from datetime import datetime
from openpyxl import load_workbook
import warnings


warnings.filterwarnings("ignore", category=UserWarning, module='openpyxl')


# input

your_ing_id = 'XO21BM'
date = datetime.strptime('2024', "%Y")

directory_path0 = os.getcwd()
directory_path = path = f"/Users/{your_ing_id}/Library/CloudStorage/OneDrive-SharedLibraries-ING/Product Evaluation and Risk Assessment Library (PEARL) - PEARL_Repository"
directory_path2 = path = f"/Users/{your_ing_id}/Library/CloudStorage/OneDrive-SharedLibraries-ING/Product Evaluation and Risk Assessment Library (PEARL) - MI Dashboard"        
            

In [3]:
# folder and ID mapping
overview = pd.read_csv('PEARL List (1).csv', sep=',', low_memory=False)
overview['Folder_URL_txt'] = overview['Folder_URL_txt'].str.replace('#$@', ' ')
overview['Folder'] = overview['Folder_URL_txt'].str.replace(r'https://ing.sharepoint.com/sites/PEARL_cs/Shared Documents/Pearl_Repository/', '')
overview['Folder'] = overview['Folder'].str.replace(r'https://ing.sharepoint.com/sites/PEARL_cs/Shared Documents/PEARL_Repository/', '')
cols_overview = ['Folder', 'ID']
overview = overview[cols_overview]
overview['ID'] = overview['ID'].astype(str)

In [4]:
# reading MI dashboard
file_name = 'MI Dashboard.xlsm'
file_path = os.path.join(directory_path2, file_name)

In [9]:
# Changes
# reading MI dashboard
file_name = 'MI Dashboard.xlsm'
file_path = os.path.join(directory_path2, file_name)

# Changes
mi_dash_changes = pd.read_excel(file_path, sheet_name="Management Info Changes", engine="openpyxl", skiprows=2)
mi_dash_changes.rename(columns={'PEARL ID': 'ID'}, inplace=True)

mi_dash_changes.loc[mi_dash_changes['End Date'] == '(blank)', 'End Date'] = np.nan
mi_dash_changes.loc[mi_dash_changes['Start Date'] == '(blank)', 'Start Date'] = np.nan
mi_dash_changes['End Date'] = pd.to_datetime(mi_dash_changes['End Date'])
mi_dash_changes['Start Date'] = pd.to_datetime(mi_dash_changes['Start Date'])


mi_dash_changes['Type'] = 'Change'

In [10]:

# Reviews
mi_dash_review = pd.read_excel(file_path, sheet_name="Management Info Reviews", engine="openpyxl", skiprows=2)
mi_dash_review.rename(columns={'RJT Review PEARL ID': 'ID', 
                               'RJT Review Start Date': 'Start Date', 
                               'RJT Review Approval Date': 'End Date', 
                               'RJT Review Status': 'Status'}, inplace=True)

# filtering only the relevant ones for our current analysis on the complete ones
mi_dash_review.loc[mi_dash_review['End Date'] == '(blank)', 'End Date'] = np.nan
mi_dash_review.loc[mi_dash_review['Start Date'] == '(blank)', 'Start Date'] = np.nan
mi_dash_review['End Date'] = pd.to_datetime(mi_dash_review['End Date'])
mi_dash_review['Start Date'] = pd.to_datetime(mi_dash_review['Start Date'])

mi_dash_review['Type'] = 'Review'

In [12]:
#mi_risk_module = pd.read_excel(file_path, sheet_name="Risk Modules", engine="openpyxl", skiprows=2)

# creating overview so we know in which risk assesments we are interested in
final_overview = pd.concat([mi_dash_changes, mi_dash_review])


cond = ~(final_overview['End Date'] < date) & ~(final_overview['ID'].isna())
final_overview = final_overview[cond]
cols = ['ID', 'Start Date', 'End Date', 'Process Category', 'Tribe', 'Status', 'Type', 'Duration']
final_overview = final_overview[cols]
final_overview = final_overview.merge(overview, how = 'left', on = 'ID')


final_overview['End Year'] = final_overview['End Date'].dt.year

cond = final_overview['Process Category'].isna()
final_overview.loc[cond, 'Process Category'] = 'empty'

final_overview_grouped = final_overview.groupby([
    'Process Category', 
    'End Year', 
    'Type']).agg({'ID': 'count'}).reset_index()

final_overview.to_csv('final_overview.csv', sep=';', index = False)

final_overview = pd.read_csv('final_overview.csv', sep=';')

# prep for the loop
# result1 = pd.DataFrame()
# result2 = pd.DataFrame()
# result3 = pd.DataFrame()
# result4 = pd.DataFrame()

In [13]:
def find_matching_strings(strings):
    matching_strings = []
    for string in strings:
        if string.lower().startswith('risk') and 'journey' in string.lower() and string.lower().endswith('xlsm'):
            matching_strings.append(string)
    
    return matching_strings

folders_to_check = final_overview.loc[:, 'Folder']

# for testing and cheking specific folder ########
item_to_find = r'Hypotheken - WUB/WUB Hypotheek met Beleggingsrekening/2024-04-15 Intermediate Review product'
temp_index = 0
for f in folders_to_check:
    
    if f == item_to_find:
        break
    
    temp_index = temp_index + 1
    
folder = folders_to_check.iloc[temp_index] # for testing
folder = folders_to_check.iloc[859] # for testing

In [14]:
index = 1199 # counter to see progression of the loop  
folders_to_check = folders_to_check[~folders_to_check.isna()]
n = len(folders_to_check)
folders_to_check = folders_to_check[index:n]
folder = folders_to_check.iloc[1, ]


for folder in folders_to_check:
    folder = folder.replace("/", "\\")
    complete_folder_path = directory_path + '\\' + folder + '\\'
    index = index + 1
    
    print(f'Folder {index} {folder}')
    
    if os.path.isdir(complete_folder_path):
        files = os.listdir(complete_folder_path)
        files = find_matching_strings(files)
    else:
        continue
    
    #f = files[0]
    for f in files: # some folders contain two or more rik journey tool excels
        
        file_path = complete_folder_path + f
        try:
            xls = pd.ExcelFile(file_path, engine="openpyxl")
            sheet_names = xls.sheet_names
            modification_time = os.path.getmtime(file_path)
            modification_time = datetime.fromtimestamp(modification_time).strftime('%Y-%m-%d %H:%M:%S')

            
        except Exception as e:
            print(f'File not opened {file_path}.')
            continue
        
        
        if "Process & Module Selection" in sheet_names:
            try:
                df = pd.read_excel(file_path, sheet_name="Process & Module Selection", engine="openpyxl")
                row_index = df.index[df.iloc[:,1] == 'Risk entity']
                
                if row_index.empty == False:
                    row_index = row_index.tolist()[0]
                    orm_row_index = row_index + 1
                    irm_row_index = row_index + 4
                    orm = str(df.iloc[orm_row_index, 1])
                    orm_value = str(df.iloc[orm_row_index, 5])
                    
                    irm = str(df.iloc[irm_row_index, 1])
                    irm_value = str(df.iloc[irm_row_index, 5])
                    
                    data = {'File': [f.lower()], 'ORM_check': [orm], 'ORM': [orm_value],
                            'IRM_check': [irm], 'IRM': [irm_value], 'Folder': [folder]}
                    data = pd.DataFrame(data)
                    data['File date'] = modification_time
                    
                    data.to_csv(directory_path3 + str(index) + '_result1.csv', sep=';', index = False)
                    #result1 = pd.concat([data, result1])
                    
            except Exception as e:
                print("Process & Module Selection NOT opened.")

            
        if "General Risk Identification" in sheet_names:
            
            try:
                df = pd.read_excel(file_path, sheet_name="General Risk Identification", engine="openpyxl")
                row_index = df.index[df.iloc[:,6] == 'Must be invited?']
                
                if row_index.empty == False:
                    row_index = row_index.tolist()[0]
                    orm_row_index = row_index + 1
                    irm_row_index = row_index + 4
                    orm = str(df.iloc[orm_row_index, 1])
                    orm_value = str(df.iloc[orm_row_index, 6])
                    orm_value2 = str(df.iloc[orm_row_index, 9])
                        
                    irm = str(df.iloc[irm_row_index, 1])
                    irm_value = str(df.iloc[irm_row_index, 6])
                    irm_value2 = str(df.iloc[irm_row_index, 9])
                        
                    data = {'File': [f.lower()], 'ORM_check': [orm],'ORM_invited': [orm_value], 'ORM_part_of_risk_asses': [orm_value2],
                                'IRM_check': [irm], 'IRM_invited': [irm_value], 'IRM_part_of_risk_asses': [irm_value2], 'Folder': [folder]}
                    data = pd.DataFrame(data)
                    data['File date'] = modification_time
                    
                    data.to_csv(directory_path3 + str(index) + '_result2.csv', sep=';', index = False)
                    #result2 = pd.concat([data, result2])
                    
            except Exception as e:
                print("General Risk Identification NOT opened.")
                
                
            
        if "Risk Summary & Approval" in sheet_names:
            try:
                df = pd.read_excel(file_path, sheet_name="Risk Summary & Approval", engine="openpyxl")
                row_index = df.index[df.iloc[:,1] == 'Step 4 -  Select Risk Entities to inform or to invite for challenge and agreement']
                if row_index.empty == False:
                    
                    row_index = row_index.tolist()[0]
                    orm_row_index = row_index + 3
                    irm_row_index = row_index + 6
                    orm = str(df.iloc[orm_row_index, 1])
                    orm_value = str(df.iloc[orm_row_index, 4])
                        
                    irm = str(df.iloc[irm_row_index, 1])
                    irm_value = str(df.iloc[irm_row_index, 4])
                        
                    data = {'File': [f.lower()], 'ORM_check': [orm],'ORM_invited_or_challange': [orm_value],
                                'IRM_check': [irm], 'IRM_invited_or_challange': [irm_value], 'Folder': [folder]}
                    data = pd.DataFrame(data)
                    data['File date'] = modification_time
                    
                    data.to_csv(directory_path3 + str(index) + '_result3.csv', sep=';', index = False)
                    #result3 = pd.concat([data, result3])
            except Exception as e:
                print("Risk Summary & Approval NOT opened.")
            
            
        if "Risk Journey Log" in sheet_names:
            try:
                df = pd.read_excel(file_path, sheet_name="Risk Journey Log", engine="openpyxl")
                row_index = df.index[df.iloc[:,1] == 'Customer Suitability']
                
                if row_index.empty == False:
                    #print(f'File does not contain Step 4 row {file_path}.') to many do not have it - solve it later
                    row_index = row_index.tolist()[0]
                    data = {'Module Title': df.iloc[row_index:(row_index+14),1].tolist(), 
                            'Applicable': df.iloc[row_index:(row_index+14),3].tolist()}
                    
                    data = pd.DataFrame(data)
                    data['File'] = f.lower()
                    data['Folder'] = folder
                    data['File date'] = modification_time
                    
                    data.to_csv(directory_path3 + str(index) + '_result4.csv', sep=';', index = False)
                    #result4 = pd.concat([data, result4])
            except Exception as e:
                print("Risk Journey Log NOT opened.")
                
            df = pd.read_excel(file_path, sheet_name="Risk journey", engine="openpyxl")

Folder 1200 Cash\Verpakt Storten\2024-11-14 update terms and conditions sealbags
Folder 1201 Credit Cards\Creditcard\2024-11-14 Change authentication level cards IRIS  Risk assessment MIA ISSUE10060964
Folder 1202 Beyond Banking\ING Punten\2024-11-14 ING Points for Charity
Folder 1203 \Communication\2024-11-14 Financial Health Content ing.nl
Folder 1204 Customer Data\Process change\2024-11-14 Epic 5137634 Develop automated solution for charging KYC fee (KKO)
Folder 1205 Z_Other\Tribe Business Lending\2024-11-15 Sox Annual Figures via SBR  SSC  FINAN
Folder 1206 Z_Other\Tribe Digital & Customer Interactions\2024-11-15 Throwback Feature
Folder 1207 Current accounts\G-rekening\2024-11-15 Modification letter Grekening
Folder 1208 Credit Cards\Creditcard\2024-11-15 Execution Winddown Revolving Credit cards
Folder 1209 Beleggingsrekeningen\Vermogensbeheer\2024-11-15 Client feedback tool in the DPM order flow  Phase 2
Folder 1210 Beleggingsrekeningen\Vermogensbeheer\2024-11-15 DPMEB Historica