# Large Language Modeling (LLM)

In [6]:
# !pip install openai #langchain-community

In [7]:
import os
# Fetch the GitHub secret key from the environment variables
LLM_Key = os.getenv("LLM_Key")
# print("GitHub Secret Key:", LLM_Key)  # Ensure it's correctly loaded (Optional)

In [8]:
from langchain.document_loaders import PyPDFLoader
from langchain.chains.summarize.chain import load_summarize_chain
from langchain import OpenAI

# import gradio as gr
loader = PyPDFLoader("papers/Wang et al., 2016.pdf") 
documents = loader.load()
docs = loader.load_and_split()

In [9]:

llm = OpenAI(temperature=0, openai_api_key=LLM_Key) # 
chain = load_summarize_chain(llm, chain_type="map_reduce")
summary = chain.run(docs)
summary

ValidationError: 1 validation error for OpenAI
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'temperature': 0, 'opena...ne, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error

# Natural Language Processing (NLP)

In [10]:
# !pip install bert-extractive-summarizer

In [11]:
from summarizer import Summarizer

# Input text to be summarized
input_text = """Since the areas close to the Sundarbans mangrove estuary, which is one of the most dynamic and productive ecosystems in the world, are very suitable for urban and industrial activities, the coastal areas of this ecosystem are constantly exposed to metal contamination. In this study, we analyzed the levels, spatial distributions, sources, pollution status, ecological risks, and health risks for recreational users of 16 metals in surface water collected from 18 sampling sites in the Sundarbans estuary. Considering the mean values of metals, Sr (2523 μg/L), Al (1731 μg/L), B (1692 μg/L) and Fe (1321 μg/L) were the most abundant metals in the coastal waters of the estuary, while Cd (0.977 μg/L), Ni (3.11 μg/L), Cu (5.98 μg/L) and Cr (9.77 μg/L) were the less abundant metals. All metals except Zr had the coefficient of variation (CV) values of over 35%, suggesting that other metals showed strong variation between sampling sites due to anthropogenic activities. Al, Fe and Pb levels of all sampling sites were above the limit values set for coastal and marine waters. Similarly, Pb levels of all sites exceeded the USEPA chronic criterion set for saltwater aquatic life. The results of pollution indices indicated that there was a serious metal pollution in almost all sampling sites. Low ecological risk (ER) at four sites, moderate ER at five sites and considerable ER at nine sites were recorded. Dual hierarchical clustering analysis grouped 16 metals into four clusters based on their potential sources and 18 sampling sites into three clusters based on their similar pollution characteristics. Health risk assessment results indicated that total hazard index (THI) values of all sites for recreational children were above the acceptable level of 1, indicating that water of all sites is not safe for health of children. However, THI values of all sites except ST8 (1.1) and ST11 (1.19) for recreational adults were below 1. Among the metals studied, Zr was found to be metal that contributes the most (75.89%) to total health risk in this coastal estuary. This finding reveals the necessity of monitoring of such less-studied metals such as Zr in the surface water of coastal estuaries. Carcinogenic risk values of As were within or below the acceptable range at all sites, indicating that carcinogenic risks would not be expected for recreational users."""

# Create a BERT extractive summarizer
summarizer = Summarizer()

# Generate the summary
summary = summarizer(input_text, min_length=50, max_length=150)  
# Output the summary
print("Original Text:")
print(input_text)

print("\nSummary:")
print(summary)



Original Text:
Since the areas close to the Sundarbans mangrove estuary, which is one of the most dynamic and productive ecosystems in the world, are very suitable for urban and industrial activities, the coastal areas of this ecosystem are constantly exposed to metal contamination. In this study, we analyzed the levels, spatial distributions, sources, pollution status, ecological risks, and health risks for recreational users of 16 metals in surface water collected from 18 sampling sites in the Sundarbans estuary. Considering the mean values of metals, Sr (2523 μg/L), Al (1731 μg/L), B (1692 μg/L) and Fe (1321 μg/L) were the most abundant metals in the coastal waters of the estuary, while Cd (0.977 μg/L), Ni (3.11 μg/L), Cu (5.98 μg/L) and Cr (9.77 μg/L) were the less abundant metals. All metals except Zr had the coefficient of variation (CV) values of over 35%, suggesting that other metals showed strong variation between sampling sites due to anthropogenic activities. Al, Fe and Pb l