In [1]:
import pandas as pd
import numpy as np
import re

from tabula import read_pdf
from tabulate import tabulate
import pdfplumber
import openpyxl

from Script import ExtractPDFTables, MapLinks2Excel

# Table Extraction and Mapping (SGD-GRI Link)

In [None]:
pdf_path = 'ESG-Frameworks/Mapping-Standards/SDG-GRI/sdg-gri.pdf'
page_range = [list(range(3, 73)), list(range(74, 99))]
area = [80.51, 90.42, 561.96, 814.18]

tableTCFD_GRI = ExtractPDFTables(pdf_path, page_range, area)
df = tableTCFD_GRI.getTablesSDG_GRI()

## Mapping Collected Dataframe (SDG-GRI)

In [None]:
excelFilePath = 'ESG-Frameworks/Outputs/testing.xlsx'
sheet = "SDG's"

mapp = MapLinks2Excel(df, sheet, excelFilePath)
mapp.MapSDG_GRI()

## Mapping Collected Dataframe (GRI-SDG)

In [None]:
excelFilePath = 'ESG-Frameworks/Outputs/testing.xlsx'
sheet = "GRI 2016"

mapp = MapLinks2Excel(df, sheet, excelFilePath)
mapp.MapGRI_SDG()

# Table Extraction and Mapping (COH4B-GRI Link)

In [9]:
pdf_path = 'ESG-Frameworks/Mapping-Standards/GRI-COH4B/gri-coh4b.pdf'
page_range = list(range(13, 22))


tableGRI_COH4B = ExtractPDFTables(pdf_path, page_range)
df = tableGRI_COH4B.getTablesGRI_COH4B()
df = tableGRI_COH4B.setHeaders(df, 0)
df = tableGRI_COH4B.headerSwap(df, 'A. COHBP & \ndefinition', None, 'id')
df = tableGRI_COH4B.addDot(df, 'id')

df = tableGRI_COH4B.extractDisclosures(df, 'D. GRI disclosures', 'GRI Standards')



In [10]:
df

Unnamed: 0,id,A. COHBP & \ndefinition,B. Example metrics \nper COH4B,C. GRI \nStandards,D. GRI disclosures,E. Comments on GRI \ndisclosures,GRI Standards
0,1.0,Health culture: \nPromoting an \norganizationa...,Inclusion of health \nin company’s value \nsta...,GRI 102: General \nDisclosures (2016)\nGRI 103...,"Disclosure 102-16 Values, \nPrinciples, Standa...",102 and 103 can be used to \nreport on a broad...,102-16
1,2.0,Responsible \ncorporate political \nactivity: ...,Disclosure of \ncontributions to political \nc...,GRI 415: Public Policy \n(2016),Disclosure 415-1 Political \nContributions: 41...,Although the example metrics \nfor this COHBP ...,415-1
2,3.0,Responsible \nmarketing practices: \nCommitmen...,% promoting products \nmeeting the Children’s ...,GRI 417: Marketing \nand Labeling (2016),Disclosure 417-1 \nRequirements for product \n...,Although the example metrics \nfor this COHBP ...,417-1
3,4.0,Health promotion \n& wellness: Health \ninitia...,Employees entitled to \nhealth and well-being ...,GRI 403: \nOccupational Health \nand Safety (2...,Disclosure 403-6 \nPromotion of worker \nheal...,"Although not included in \nCOH4B, protecting t...",403-6
4,5.0,Paid family and \nmedical leave: \nAllowing em...,Total # of employees that \nwere entitled by p...,GRI 401: \nEmployment (2016),Disclosure 401-2 Benefits \nprovided to full-t...,Paid sick leave can be reported \nunder 401-2-...,401-2
5,6.0,Health insurance: \nProviding employer-\nbased...,% of employees entitled to \nhealth insurance ...,GRI 401: \nEmployment (2016)\nGRI 403: \nOccup...,Disclosure 401-2 Benefits \nprovided to full-t...,Responsibility and expectations \nfor companie...,401-2
6,7.0,"Equality, diversity \n& impartiality: \nManagi...",% of employees per \nemployee category \nby ge...,GRI 405: Diversity \nand Equal \nOpportunity (...,Disclosure 405-1 Diversity \nof governance bod...,Interpretation of the term \ndiversity and ind...,405-1
7,8.0,Financial literacy: \nProviding financial \nl...,Employees entitled to \nfinancial literacy tra...,G4 Sector \nDisclosures: Financial \nServices ...,Initiatives to enhance \nfinancial literacy by...,G4 Sector Disclosures precede \nthe GRI Standa...,No value :(
8,9.0,"Work time: \nManaging working \nhours, schedu...",Notice given to employees \nfor schedule chang...,GRI 402: Labor/\nManagement \nRelations (2016)...,Disclosure 402-1 \nMinimum notice period \nreg...,Disclosure 102-8 can be \nused to share data o...,402-1
9,10.0,Job security: \nManaging job \ninsecurity as \...,# of employees laid off \nin the past year; No...,GRI 401: \nEmployment (2016)\nGRI 402: Labor/\...,Disclosure 401-1 New \nemployee hires and \nem...,Disclosure 402-1 can be used \nto share inform...,401-1


In [None]:
df.to_csv('ESG-Frameworks/Mapping-Standards/GRI-COH4B/GRI-COH4B-2.csv')

## Mapping Collected Dataframe (COH4B-GRI)

In [None]:
excelFilePath = 'ESG-Frameworks/Outputs/testing.xlsx'
sheet = "COH4B"

mapp = MapLinks2Excel(df, sheet, excelFilePath)
mapp.MapCOH4B_GRI()

## Mapping Collected Dataframe (GRI-COHB4)

In [None]:
excelFilePath = 'ESG-Frameworks/Outputs/testing.xlsx'
sheet = "GRI 2016"

mapp = MapLinks2Excel(df, sheet, excelFilePath)
mapp.MapGRI_COH4B()

# Table Extraction and Mapping (TCFD-GRI Link)

In [7]:
pdf_path = 'ESG-Frameworks/Mapping-Standards/TCFD-GRI/TCFD to GRI-2.pdf'
page_range = list(range(49, 59))


tableTCFD_GRI = ExtractPDFTables(pdf_path, page_range)
df = tableTCFD_GRI.getTablesTCFD_GRI()

In [None]:
# df.to_csv('ESG-Frameworks/Mapping-Standards/TCFD-GRI/TCFD-GRI-2.csv')

In [4]:
df.columns

Index(['Recommended \nDisclosures \n(TCFD Framework)',
       'Related \ncode/\nparagraph', 'Description',
       'Omissions and/or content difference(s)', 'Level of \nalignment'],
      dtype='object')

In [5]:
df_ = tableTCFD_GRI.extractDisclosures2(df, 'Description', 'GRI Standards')

In [11]:
values = []

# for i in range(0, len(df['Related \ncode/\nparagraph'])):

#     match = re.search(r'[0-9]{3}-[0-9]{2}|[0-9]{3}-[0-9]{1}', df['Related \ncode/\nparagraph'][i])

    
#     try:
#         values.append(df['Related \ncode/\nparagraph'][i][match.start():match.end()])
#     except:
#         values.append('No values :(')
#         pass
    

    
    
for i in range(0, len(df['Description'])):
			match = re.search(r'[0-9]{3}-[0-9]{2}|[0-9]{3}-[0-9]{1}', df['Description'][i])


			try:
				values.append(df['RelDescription'][i][match.start():match.end()])

			except:
				# values.append('No value :(')
				pass
df['GRI Standards'] = values


KeyError: 'Description'

In [None]:
values

In [None]:
df.columns

In [None]:
df