In [15]:
import os
import glob
import pandas as pd
from thefuzz import fuzz
from tqdm import tqdm

directory = '../data/txt/'

interesting_terms = [
    'Climate governance',
    'Board climate oversight',
    'Climate risk governance',
    'Integrating climate in corporate strategy',
    'Climate leadership',
    'Climate disclosure',
    'Climate management',
    'Climate adaptation governance',
    'Environmental leadership'
]

In [17]:
results = []

# Iterate through all text files in the directory
for filename in tqdm(glob.glob(os.path.join(directory, "*.txt"))):
    # Get the filename without the path and extension
    basename = os.path.splitext(os.path.basename(filename))[0]
    # Extract the integer part of the filename
    file_id = int(basename)

    # Read the contents of the file
    with open(filename, "r") as file:
        text = file.read()

    # Split the text into sentences
    sentences = text.split(". ")

    # Iterate through each sentence
    for sentence in sentences:
        sentence = sentence.replace('\n', '')
        # Iterate through each interesting term
        for term in interesting_terms:
            term = term.lower()
            # Use fuzzy matching to detect if the sentence contains the term
            # if fuzz.token_set_ratio(sentence, term) == 100:
                # Add the detection to the results list
            if sentence.lower().find(term) != -1:
                results.append({"file_id": file_id, "sentence": sentence, "term": term})


100%|██████████| 91/91 [00:01<00:00, 45.84it/s]


In [18]:
df = pd.DataFrame(results)
df

Unnamed: 0,file_id,sentence,term
0,100,Repsol publishes additional climate change in...,climate disclosure
1,117,Significant non-financial risk factors for the...,climate governance
2,116,27.0 6.201 9 C0 CLIMATE DISCLOSURE Soc...,climate disclosure
3,116,17 CLIMATE DISCLOSURE C0 │ 2 3.5.1 Ope...,climate disclosure
4,116,32 CLIMATE DISCLOSURE C0 │ 3 5.3.3 Car...,climate disclosure
...,...,...,...
74,122,Protect.2018 Financial ReportFinancial highlig...,climate governance
75,122,Reconciliation of AGM 2018 resolution for Boa...,climate governance
76,122,174 Swiss R e 2018 Financial ReportClimate gov...,climate governance
77,126,"In June 2017, TOTAL became a founding member o...",climate leadership


In [19]:
df.to_csv('../data/tcfd_term_detected.csv', index=False)

In [20]:
df.file_id.value_counts()

116    38
44      6
3       5
122     3
46      3
4       2
5       2
37      2
126     2
11      2
67      1
80      1
56      1
117     1
54      1
82      1
55      1
75      1
61      1
0       1
47      1
110     1
17      1
100     1
Name: file_id, dtype: int64

In [21]:
df.term.value_counts()

climate disclosure          55
climate governance          10
climate leadership           9
climate management           3
environmental leadership     2
Name: term, dtype: int64

In [22]:
df = pd.read_csv('../data/tcfd_term_detected.csv')

In [23]:
df2 = pd.read_csv('../data/tcfd.csv')

In [24]:
df2

Unnamed: 0,Company,Industry,Region,Year,Company Report,Recommended Disclosure,Report URL
0,A.P. Moller-Maersk Group,Transportation,Europe,2017,"2017 Sustainability Report, p. 21",Risk Management a),https://www.maersk.com/~/media_sc9/maersk/corp...
1,AES,Energy,North America,2018,"AES Climate Scenario Report, p. 6-7",Strategy c),https://www.aes.com/sites/default/files/2021-0...
2,Aker BP,Energy,Europe,2020,"Sustainability Report 2020, p. 25",Metrics and Targets a),https://mb.cision.com/Public/1629/3313545/bdef...
3,Allianz Group,Insurance; Asset Manager,Europe,2020,"Sustainability Report 2020, p. 85",Strategy c),https://www.allianz.com/content/dam/onemarketi...
4,Allianz Group,Insurance; Asset Manager,Europe,2019,"Sustainability Report 2019, p. 75",Risk Management a); Risk Management b),https://www.allianz.com/content/dam/onemarketi...
...,...,...,...,...,...,...,...
128,Unilever,Consumer Goods,Europe,2017,"Annual Report and Accounts 2017, pp. 9, 32, 64",Governance a); Governance b); Strategy b); Str...,https://assets.unilever.com/files/92ui5egz/pro...
129,Verizon,Technology and Media,North America,2019,"TCFD Report 2019, p. 7",Risk Management a); Risk Management b),https://www.verizon.com/about/sites/default/fi...
130,Walmart de México y Centroamérica,"Agriculture, Food, and Forest Products",North America,2019,Financial and Corporate Responsibility Report ...,Metrics and Targets b),https://informes.walmex.mx/2019/descargas/eng/...
131,Westpac Group,Banking,Asia Pacific,2017,2017 Westpac Group Sustainability Performance ...,Risk Management c),https://www.westpac.com.au/content/dam/public/...


In [25]:
df2['index_col'] = range(len(df2))
df2 = df2.set_index('index_col')
df2

Unnamed: 0_level_0,Company,Industry,Region,Year,Company Report,Recommended Disclosure,Report URL
index_col,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,A.P. Moller-Maersk Group,Transportation,Europe,2017,"2017 Sustainability Report, p. 21",Risk Management a),https://www.maersk.com/~/media_sc9/maersk/corp...
1,AES,Energy,North America,2018,"AES Climate Scenario Report, p. 6-7",Strategy c),https://www.aes.com/sites/default/files/2021-0...
2,Aker BP,Energy,Europe,2020,"Sustainability Report 2020, p. 25",Metrics and Targets a),https://mb.cision.com/Public/1629/3313545/bdef...
3,Allianz Group,Insurance; Asset Manager,Europe,2020,"Sustainability Report 2020, p. 85",Strategy c),https://www.allianz.com/content/dam/onemarketi...
4,Allianz Group,Insurance; Asset Manager,Europe,2019,"Sustainability Report 2019, p. 75",Risk Management a); Risk Management b),https://www.allianz.com/content/dam/onemarketi...
...,...,...,...,...,...,...,...
128,Unilever,Consumer Goods,Europe,2017,"Annual Report and Accounts 2017, pp. 9, 32, 64",Governance a); Governance b); Strategy b); Str...,https://assets.unilever.com/files/92ui5egz/pro...
129,Verizon,Technology and Media,North America,2019,"TCFD Report 2019, p. 7",Risk Management a); Risk Management b),https://www.verizon.com/about/sites/default/fi...
130,Walmart de México y Centroamérica,"Agriculture, Food, and Forest Products",North America,2019,Financial and Corporate Responsibility Report ...,Metrics and Targets b),https://informes.walmex.mx/2019/descargas/eng/...
131,Westpac Group,Banking,Asia Pacific,2017,2017 Westpac Group Sustainability Performance ...,Risk Management c),https://www.westpac.com.au/content/dam/public/...


In [26]:
df_merged = pd.merge(df, df2, left_on='file_id', right_on='index_col', how='left')

In [27]:
df_merged

Unnamed: 0,file_id,sentence,term,Company,Industry,Region,Year,Company Report,Recommended Disclosure,Report URL
0,100,Repsol publishes additional climate change in...,climate disclosure,Repsol,Energy,Europe,2018,"Integrated Management Report 2018, p. 68",Metrics and Targets a); Metrics and Targets b)...,https://www.repsol.com/content/dam/repsol-corp...
1,117,Significant non-financial risk factors for the...,climate governance,Société Générale,Banking,Europe,2020,"Universal Registration Document 2020, p. 284",Risk Management c),https://www.societegenerale.com/sites/default/...
2,116,27.0 6.201 9 C0 CLIMATE DISCLOSURE Soc...,climate disclosure,Société Générale,Banking,Europe,2019,Societe Generale’s Task Force on Climate-relat...,Strategy a),https://www.societegenerale.com/sites/default/...
3,116,17 CLIMATE DISCLOSURE C0 │ 2 3.5.1 Ope...,climate disclosure,Société Générale,Banking,Europe,2019,Societe Generale’s Task Force on Climate-relat...,Strategy a),https://www.societegenerale.com/sites/default/...
4,116,32 CLIMATE DISCLOSURE C0 │ 3 5.3.3 Car...,climate disclosure,Société Générale,Banking,Europe,2019,Societe Generale’s Task Force on Climate-relat...,Strategy a),https://www.societegenerale.com/sites/default/...
...,...,...,...,...,...,...,...,...,...,...
74,122,Protect.2018 Financial ReportFinancial highlig...,climate governance,Swiss Re,Asset Owner,Europe,2018,"2018 Financial Report, p. 177",Strategy a); Strategy b),https://reports.swissre.com/2018/servicepages/...
75,122,Reconciliation of AGM 2018 resolution for Boa...,climate governance,Swiss Re,Asset Owner,Europe,2018,"2018 Financial Report, p. 177",Strategy a); Strategy b),https://reports.swissre.com/2018/servicepages/...
76,122,174 Swiss R e 2018 Financial ReportClimate gov...,climate governance,Swiss Re,Asset Owner,Europe,2018,"2018 Financial Report, p. 177",Strategy a); Strategy b),https://reports.swissre.com/2018/servicepages/...
77,126,"In June 2017, TOTAL became a founding member o...",climate leadership,Total,Energy,Europe,2019,"Universal Registration Document 2019, p. 232",Metrics and Targets c),https://www.total.com/sites/g/files/nytnzq111/...


In [28]:
df_merged.to_csv('../data/tcfd_term_merged.csv', index=False)