In [2]:
import os
import glob
import signal
import sys
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA  # Import RetrievalQA
from langchain.schema import HumanMessage

  from .autonotebook import tqdm as notebook_tqdm


### Prep Doc

In [17]:
pdf_path = "pdf/China_CLP_Climate_Related_Disclosures_Report_2022_en.pdf.coredownload.pdf"

documents = []
loader = PyPDFLoader(pdf_path)
# print(loader)
pdf_docs = loader.load()
# print(pdf_docs)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# print(text_splitter)
documents.extend(text_splitter.split_documents(pdf_docs))
# print(documents)

print(f"Total loaded document chunks: {len(documents)}")

Total loaded document chunks: 286


### Set LLM

In [18]:
# Set up embeddings and LLM with Google Gemini API
GEMINI_API_KEY = "AIzaSyCz1KGd2xl3Q_YmscP3ua0ZWd8Zrv9HO00"
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API_KEY)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GEMINI_API_KEY)

# Create FAISS vector database from documents
vector_db = FAISS.from_documents(documents, embeddings)
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

### Set prompt

In [None]:
def generate_rag_prompt(query, template, context):
    prompt = f"""
Anda adalah bot yang berperan sebagai penilai indeks sustainability perusahaan yang dapat memberikan nilai sustainability perusahaan berdasarkan indikator carbon disclosure index berikut:

1. Assessment/description/identification of climate change-related risk
2. Assessment/description/identification of climate change-related opportunities
3. Assessment of the impact of future climate-related risk and opportunities
4. Description of scope1 emissions
5. Comparison of scope1 emissions with the previous year
6. Description of scope2 emissions
7. Comparison of scope2 emissions with the previous year
8. Description of scope3 emissions
9. Comparison of scope3 emissions with the previous year
10. Total GHG emissions
11. Emission of Ozone depleting substances
12. Nitrogen oxide, sulfur oxide and other air emissions
13. Disclosure of emission intensity
14. Disclosure of GHG emissions by sources
15. Disclosure of GHG emission by facilities/segment
16. Description of protocol/standard/methodology used to measure GHG emissions
17. Disclosure of reduction in emissions
18. Disclosure of initiatives taken to reduce emissions
19. Description of targets set to reduce emissions
20. Comparison of current year targets with the previous year
21. Are emissions generated within permissible limits given by CPCB/SPCB
22. Environment management system in the company
23. Identify the person/management/committee responsible for climate policies/strategy
24. Description of climate change-related business strategy
25. Consumption of fuel
26. Consumption of purchased/acquired electricity
27. Quantification of total energy consumption
28. Comparison of total energy consumption with the previous year
29. Reduction in energy consumption
30. Targets set to reduce energy consumption
31. Consumption of renewable energy
32. Participation in emission trading schemes
33. Statement from CEO/chairman/head of sustainability committee of the company regarding climate change 
34. Inclusion of words “climate change”/“global warming”
35. Section devoted to climate change/natural capital/sustainability
36. Investment in clean energy technologies
37. Awards if any

Setiap indikator mempunyai 4 kategori penilaian yaitu:

0 :	Not Reported
1 :	Qualitative
2 :	Quantitative
3 :	Qualitative and Quantitative

Berikan label berdasarkan kategori yang ada di tiap indikatornya.

PERTANYAAN: '{query}'
KONTEKS: '{context}'

Berikan juga halaman pada dokumen tersebut sesuai jawaban yang diberikan pada setiap label.
Buat dalam bentuk tabel yang terdiri dari no, indikator, label, dan deskripsi.
Sebutkan pada deskripsi bahwa ada di halaman berapa yang menunjukkan label pada indikator tersebut.
"""
    
    template = f"""
Anda adalah seorang ahli sustainability yang berpengalaman dalam menjelaskan jawaban akurat dari teks yang kompleks.
Manfaatkan konteks yang diberikan untuk memberikan jawaban yang jelas dan terinci.

Konteks:
{context}

Berikan jawaban yang informatif dan mendalam berdasarkan konteks yang ada!
    """
    return prompt, template

In [20]:
# query = input("Query (or type 'exit' to quit): ")
# if query.lower() == 'exit':  # Check if the user wants to exit
#     print("Exiting the program. Goodbye!")
#     break  # Exit the loop

query = "Bagaimana hasil dari indikator pada dokumen tersebut?"

context = "\n".join([result.page_content for result in retriever.get_relevant_documents(query)])
prompt, template = generate_rag_prompt(query=query, template='', context=context)

# Create a HumanMessage object with the generated prompt
messages = [HumanMessage(content=prompt)]

# Pass the messages to the llm
answer = llm(messages=messages)
print("Answer:", answer.content)

output_file = "answer4.txt"

with open(output_file, "w") as file:
    file.write(answer.content)

print(f"Answer saved to {output_file}")

Answer: Berdasarkan teks yang diberikan, sulit untuk memberikan penilaian yang komprehensif terhadap seluruh 27 indikator karena informasi yang tersedia sangat terbatas dan tidak memberikan data kuantitatif yang dibutuhkan untuk sebagian besar indikator.  Teks tersebut lebih berfokus pada gambaran umum risiko dan peluang iklim, serta strategi perusahaan, bukan pada data emisi dan inisiatif pengurangan emisi yang spesifik.

Oleh karena itu, penilaian berikut hanya didasarkan pada informasi yang tersedia di dalam teks yang diberikan dan banyak indikator akan diberi label "0: Not Reported" karena informasi yang dibutuhkan tidak ada.  Penilaian ini bersifat sementara dan bisa berubah jika diberikan dokumen lengkap.

| No | Indikator | Label | Deskripsi | Halaman |
|---|---|---|---|---|
| 1 | Assessment/description/identification of climate change-related risk | 1: Qualitative | Deskripsi kualitatif risiko perubahan iklim disebutkan, seperti dampak perubahan kecepatan angin terhadap aset an