In [2]:
from llama_cpp import Llama
import llama_cpp
from datasets import load_dataset
import evaluate
import requests
import json
from tqdm.notebook import tqdm  # Use tqdm.notebook for Jupyter Notebook
import time

In [4]:
print("GPU offload supported:", llama_cpp.llama_supports_gpu_offload())

# Path to your GGUF model (adjust path if needed)
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-gguf/llama-3.1-8B-Q8_0.gguf"

# Initialize Llama with GPU layers offloaded
llm = Llama(
    model_path=model_path,
    n_ctx=8192,
    n_gpu_layers=80,     # Offload to GPU! Adjust as needed based on your VRAM
    verbose=False         # Prints backend info
)

GPU offload supported: True


In [5]:
# Load test data
dataset = load_dataset("gigaword", split="test[:100]")  # Limit to 100 for fast eval

# Initialize ROUGE metric
rouge = evaluate.load('rouge')

In [6]:
def summarize_with_llama_cpp(document, max_tokens=50):
    prompt_template = (
        "You are an AI assistant specialized in summarizing news articles. "
        "Summarize the following news sentence into a concise headline.\n\n"

        "Here is an example:\n"
        "News: Japan 's nec corp. and UNK computer corp. of the united states said wednesday they had agreed to join forces in supercomputer sales.\n"
        "Headline: Nec UNK in computer sales tie-up\n\n"

        "Now summarize the following news:\n\n"

        "News: {document}\n\n"
        "Headline:"
    )
    
    prompt = prompt_template.format(document=document)

    payload = {
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": 0.3,
        "stream": False,
        "seed": 0,
    }

    response = llm(**payload)

    summary = response['choices'][0]['text'].strip()
    return summary


In [7]:
# Generate summaries and evaluate
references = []
predictions = []

start = time.time()

for item in dataset:

    doc = item['document']
    ref_summary = item['summary']

    pred_summary = summarize_with_llama_cpp(doc)

    if pred_summary:
        references.append(ref_summary)
        predictions.append(pred_summary)
    
end = time.time()

In [9]:
# Evaluate with ROUGE
results = rouge.compute(predictions=predictions, references=references)

print("llama.cpp (Llama-3.1-8B) float16 Summarization Results:")

print(f"\nNumber of examples: {len(references)}")
print(f"\nElapsed time: {end - start:.2f} s")

print("\nROUGE Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llama.cpp (Llama-3.1-8B) float16 Summarization Results:

Number of examples: 100

Elapsed time: 162.28 s

ROUGE Results:
rouge1: 0.1792
rouge2: 0.0601
rougeL: 0.1577
rougeLsum: 0.1629


### Now trying the 8bit quantization

In [None]:
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-gguf/llama-3.1-8B-Q8_0.gguf"

llm = Llama(
    model_path=model_path,
    n_ctx=8192,         # 8K tokens context is fine unless you need more
    n_gpu_layers=80,    # L4 GPU 24 GB can usually handle **60-80 layers** for 8B models
    verbose=False
)

In [11]:
# Generate summaries and evaluate
references = []
predictions = []

start = time.time()

for item in dataset:

    doc = item['document']
    ref_summary = item['summary']

    pred_summary = summarize_with_llama_cpp(doc)

    if pred_summary:
        references.append(ref_summary)
        predictions.append(pred_summary)
    
end = time.time()

In [12]:
# Evaluate with ROUGE
results = rouge.compute(predictions=predictions, references=references)

print("llama.cpp (Llama-3.1-8B) 8Bit Summarization Results:")

print(f"\nNumber of examples: {len(references)}")
print(f"\nElapsed time: {end - start:.2f} s")

print("\nROUGE Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llama.cpp (Llama-3.1-8B) 8Bit Summarization Results:

Number of examples: 100

Elapsed time: 161.84 s

ROUGE Results:
rouge1: 0.1788
rouge2: 0.0608
rougeL: 0.1580
rougeLsum: 0.1631


### Now 4Bit quantization

In [16]:
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-gguf/llama-3.1-8B-Q4_K_M.gguf"

llm = Llama(
    model_path=model_path,
    n_ctx=8192,         # 8K tokens context is fine unless you need more
    n_gpu_layers=80,    # L4 GPU 24 GB can usually handle **60-80 layers** for 8B models
    verbose=False
)

In [17]:
# Generate summaries and evaluate
references = []
predictions = []

start = time.time()

for item in dataset:

    doc = item['document']
    ref_summary = item['summary']

    pred_summary = summarize_with_llama_cpp(doc)

    if pred_summary:
        references.append(ref_summary)
        predictions.append(pred_summary)
    
end = time.time()

In [18]:
# Evaluate with ROUGE
results = rouge.compute(predictions=predictions, references=references)

print("llama.cpp (Llama-3.1-8B) 4Bit Summarization Results:")

print(f"\nNumber of examples: {len(references)}")
print(f"\nElapsed time: {end - start:.2f} s")

print("\nROUGE Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llama.cpp (Llama-3.1-8B) 4Bit Summarization Results:

Number of examples: 98

Elapsed time: 64.42 s

ROUGE Results:
rouge1: 0.2699
rouge2: 0.1160
rougeL: 0.2491
rougeLsum: 0.2501


### LLaMA 3.1 8B Summarization Benchmark

<small>

#### 📝 Tested Models
| **Model File**                      | **Precision** | **Quantization Scheme** | **Notes**                                     |
|-------------------------------------|---------------|-------------------------|-----------------------------------------------|
| llama-3.1-8B-f16.gguf               | float16       | Full Precision          | Largest model, highest theoretical accuracy, slowest inference |
| llama-3.1-8B-Q8_0.gguf              | 8-bit         | Q8_0                   | Reduced size, good balance of speed and quality |
| llama-3.1-8B-Q4_K_M.gguf            | 4-bit         | Q4_K_M                 | Highly optimized 4-bit quantization, best summarization quality in tests |

---

#### 📊 Summarization Results

| **Platform / Model**               | **Elapsed Time (s)** | **ROUGE-1** | **ROUGE-2** | **ROUGE-L** | **ROUGE-Lsum** |
|------------------------------------|----------------------|-------------|-------------|-------------|----------------|
| Ollama (LLaMA 3.1 8B Q4_K_M)       | 49.06                | 0.2886      | 0.1040      | 0.2632      | 0.2658         |
| llama.cpp (Q4_K_M)                 | 64.42                | 0.2699      | 0.1160      | 0.2491      | 0.2501         |
| llama.cpp (Q8_0)                   | 161.84               | 0.1788      | 0.0608      | 0.1580      | 0.1631         |
| llama.cpp (float16)                | 161.65               | 0.1801      | 0.0599      | 0.1591      | 0.1629         |

---

#### 🔍 Summary of Insights

- **4-bit quantized models (Q4_K_M)** in both **Ollama** and **llama.cpp** delivered **better summarization quality** and **faster inference** than higher-precision models.
- The **Q4_K_M quantization scheme** preserves summarization performance surprisingly well and matches Ollama's results.
- **8-bit (Q8_0)** and **float16** models performed worse in ROUGE scores, despite having more precision. This may be due to:
  - Differences in **prompt formatting**
  - **Sampling parameters**
  - Potential model variant differences (instruction-tuned vs base models)

---

#### ✅ Recommendations

1. Use **Q4_K_M quantized models** in llama.cpp for comparable performance to Ollama.
2. Match **prompt templates** used in Ollama:

</small>

### 🧠 Q4_K_M vs Q8_0 Quantization Comparison

<small>

#### What Are They?
Both **Q4_K_M** and **Q8_0** are quantization methods used to compress model weights for faster inference and lower memory usage.

---

#### Q4_K_M (4-bit Quantization, Optimized)

| Feature          | Description |
|------------------|-------------|
| **Precision**    | 4-bit |
| **Quantization Type** | "K" series, specifically **K_M** (multi-purpose optimized) |
| **Compression**  | Very high (significantly smaller than 8-bit) |
| **Speed**        | Extremely fast, ideal for CPU/GPU |
| **Memory Usage** | Very low (fits on smaller GPUs like 6-8GB VRAM) |
| **Accuracy**     | Preserves high accuracy in **instruction-tuned tasks** like **summarization**, **chat**, and **QA** |
| **Best Use Cases** | Chatbots, summarization, reasoning tasks |
| **Notes**        | Uses **groupwise quantization** and **per-channel scaling** for better accuracy retention despite low precision |

---

#### Q8_0 (8-bit Quantization, General Purpose)

| Feature          | Description |
|------------------|-------------|
| **Precision**    | 8-bit |
| **Quantization Type** | Uniform 8-bit |
| **Compression**  | Moderate (smaller than float16 but larger than 4-bit) |
| **Speed**        | Faster than float16, but slower than Q4_K_M |
| **Memory Usage** | Moderate (needs more VRAM, typically 12GB+) |
| **Accuracy**     | Higher precision retention in general, but not optimized for specific tasks |
| **Best Use Cases** | Complex reasoning, precision-sensitive tasks |
| **Notes**        | General-purpose quantization without task-specific optimizations |

---

#### ⚖️ Comparison Table: Q4_K_M vs Q8_0

| Feature          | **Q4_K_M**                  | **Q8_0**                  |
|------------------|-----------------------------|---------------------------|
| **Precision**    | 4-bit                       | 8-bit                    |
| **Size**         | Very small                  | Medium                   |
| **Speed**        | Very fast (low latency)     | Fast (higher latency than 4-bit) |
| **VRAM/Memory**  | Very low usage (fits on smaller GPUs/CPUs) | Medium (requires more VRAM) |
| **Accuracy**     | High for summarization, chat, reasoning (optimized quantization) | General higher precision (not task-optimized) |
| **Task Tuning**  | Task-specific optimizations (instruction-following, summarization) | General-purpose |
| **Best Use**     | Chatbots, summarization, QA tasks with constrained resources | Complex reasoning or precision-sensitive tasks |
| **Ollama Default?** | ✅ Frequently used (Q4_K_M or Q4_K_S) | ❌ Usually not used |

---

#### ✅ Why Q4_K_M Outperformed Q8_0 in Summarization
- **Q4_K_M** is optimized for **task-specific performance**, often giving better results for **instruction-tuned models**, **summarization**, and **chat** tasks.
- **Q8_0** retains more raw precision but isn't tuned for these tasks, leading to lower scores in ROUGE evaluation.
- **Q4_K_M** also runs significantly faster with less resource usage.

---

</small>

### 🧠 How Q4_K_M Is Optimized

<small>

Q4_K_M is part of the advanced **K series** quantization schemes, designed to balance **speed**, **size**, and **accuracy**. It introduces several optimizations to maintain high task performance despite being a 4-bit quantization.

#### Key Optimizations

| Optimization                   | Description |
|--------------------------------|-------------|
| **Groupwise Quantization**     | Weights are divided into small groups (e.g., 32 or 64) and quantized individually, improving precision retention. |
| **Per-Channel Scaling**        | Each group or channel has its own scale factor, ensuring finer control over the quantization process. |
| **Mixed Weight Packing (M)**   | Uses different packing strategies optimized for different layers (e.g., attention vs MLP layers). |
| **Dynamic Zero Points**        | Zero points are dynamically computed within groups, reducing quantization bias. |
| **Efficient SIMD Utilization** | The packed format is optimized for vectorized operations on CPU and GPU, increasing inference speed. |

#### Why It Works Well
- Optimized for **instruction-following**, **summarization**, and **chat** tasks.
- Preserves task-critical accuracy despite aggressive compression.
- Runs **very efficiently** on both CPU and GPU.

#### Q4_K_M vs Q8_0

| Feature            | Q4_K_M                   | Q8_0                 |
|--------------------|--------------------------|----------------------|
| Precision          | 4-bit                    | 8-bit               |
| Compression        | High                     | Medium              |
| Accuracy Retention | High (task-optimized)    | High (general)      |
| Speed              | Very fast                | Fast                |
| Memory Usage       | Very low                 | Medium              |
| Task Tuning        | Summarization, Chat, QA  | General-purpose     |
| Ollama Use         | ✅ Often used (default)  | ❌ Less common       |

</small>

In [1]:
from datasets import load_dataset
import random
import json

# Load SQuAD v2 dataset (validation split)
squad_v2 = load_dataset("squad_v2")

# Set random seed for reproducibility
random.seed(42)

# Convert the validation split to a list and sample 200 random questions
validation_list = list(squad_v2["validation"])
sampled_questions = random.sample(validation_list, 200)

print(sampled_questions[1])

{'id': '57111b95a58dae1900cd6c53', 'title': 'Huguenot', 'context': 'Frederick William, Elector of Brandenburg, invited Huguenots to settle in his realms, and a number of their descendants rose to positions of prominence in Prussia. Several prominent German military, cultural, and political figures were ethnic Huguenot, including poet Theodor Fontane, General Hermann von François, the hero of the First World War Battle of Tannenberg, Luftwaffe General and fighter ace Adolf Galland, Luftwaffe flying ace Hans-Joachim Marseille, and famed U-boat captain Lothar von Arnauld de la Perière. The last Prime Minister of the (East) German Democratic Republic, Lothar de Maizière, is also a descendant of a Huguenot family, as is the German Federal Minister of the Interior, Thomas de Maizière.', 'question': 'Who was the final Prime Minister of East Germany?', 'answers': {'text': ['Lothar de Maizière', 'Lothar de Maizière', 'Lothar de Maizière'], 'answer_start': [588, 588, 588]}}


In [2]:
# Count how many sampled questions are answerable
answerable_count = sum(1 for q in sampled_questions if len(q['answers']['text']) > 0)

# Optionally, count unanswerable too
unanswerable_count = len(sampled_questions) - answerable_count

# Print results
print(f"Total sampled questions: {len(sampled_questions)}")
print(f"Answerable questions: {answerable_count}")
print(f"Unanswerable questions: {unanswerable_count}")

Total sampled questions: 200
Answerable questions: 101
Unanswerable questions: 99


In [None]:
import time
from evaluate import load
import re
import string
import csv
import time
from evaluate import load

def normalize_answer(s):
    """Lower text and remove punctuation, articles, and extra whitespace."""
    
    def lower(text):
        return text.lower()

    def remove_punctuation(text):
        return text.translate(str.maketrans('', '', string.punctuation))

    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    return white_space_fix(remove_articles(remove_punctuation(lower(s))))

def clean_prediction(prediction):
    """
    Cleans the raw prediction output from llama.cpp.
    - Truncates at a new line, 'Context:', or other stop signals.
    - Normalizes the prediction.
    """
    # Split on common stop sequences
    stop_tokens = ["\n\n", "\nContext:", "Context:", "Question:"]
    for stop in stop_tokens:
        if stop in prediction:
            prediction = prediction.split(stop)[0]

    return normalize_answer(prediction)


def qa_with_llama_cpp(example, max_tokens=50, verbose=True):
    context = example['context']
    question = example['question']
    ground_truth_answers = example['answers']

    prompt_template = (
        "You are a question answering assistant. Given the context, answer the question. "
        "If the answer isn't in the context, say 'I don't know'.\n\n"

        "Here is an example:\n"
        "Context: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni)...\n"
        "Question: What is the name of the region the Normans gave their name to?\n"
        "Answer: Normandy\n\n"

        "Context: {context}\n\n"
        "Question: {question}\n\n"
        "Answer:"
    )
    
    prompt = prompt_template.format(context=context, question=question)

    payload = {
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": 0.0,
        "stream": False,
        "seed": 0,
    }

    response = llm(**payload)
    raw_prediction = response['choices'][0]['text'].strip()

    # Clean and normalize the prediction
    prediction = clean_prediction(raw_prediction)

    return {
        "question": question,
        "prediction": prediction,
        "ground_truths": ground_truth_answers['text'],
    }



def compute_exact_match(prediction, ground_truths):
    """Exact match: 1 if prediction is in ground_truths, else 0."""
    return int(prediction in ground_truths)

def compute_f1(prediction, ground_truths):
    """Compute the maximum F1 over all ground truths."""
    def get_tokens(s):
        return normalize_answer(s).split()

    pred_tokens = get_tokens(prediction)
    if not pred_tokens:
        return int(not any(get_tokens(gt) for gt in ground_truths))

    scores = []
    for gt in ground_truths:
        gt_tokens = get_tokens(gt)
        common = set(pred_tokens) & set(gt_tokens)
        num_same = len(common)

        if num_same == 0:
            scores.append(0)
            continue

        precision = num_same / len(pred_tokens)
        recall = num_same / len(gt_tokens)
        f1 = 2 * precision * recall / (precision + recall)
        scores.append(f1)

    return max(scores)

def evaluate_qa_with_llama_cpp(dataset, qa_function, save_path=None, skip_unanswerable=True):
    squad_metric = load("squad")
    references = []
    predictions = []
    per_example_results = []

    start = time.time()

    for example in dataset:
        if skip_unanswerable and len(example['answers']['text']) == 0:
            continue

        result = qa_function(example)

        if result['prediction'] is not None:
            normalized_prediction = normalize_answer(result['prediction'])
            normalized_ground_truths = [normalize_answer(ans) for ans in result['ground_truths']]

            em = compute_exact_match(normalized_prediction, normalized_ground_truths)
            f1 = compute_f1(normalized_prediction, normalized_ground_truths)

            predictions.append({
                "id": example['id'],
                "prediction_text": normalized_prediction
            })

            references.append({
                "id": example['id'],
                "answers": {
                    "text": result['ground_truths'],
                    "answer_start": example['answers']['answer_start']
                }
            })

            per_example_results.append({
                'id': example['id'],
                'prediction_text': normalized_prediction,
                'ground_truth_text': "; ".join(normalized_ground_truths),
                'answer_start': "; ".join(map(str, example['answers']['answer_start'])),
                'exact_match': em,
                'f1_score': round(f1, 4)
            })

    end = time.time()

    # Compute overall metrics
    results = squad_metric.compute(predictions=predictions, references=references)
    elapsed_time = end - start

    # Save per-example results to CSV
    if save_path:
        with open(save_path, mode='w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['id', 'prediction_text', 'ground_truth_text', 'answer_start', 'exact_match', 'f1_score']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()

            for row in per_example_results:
                writer.writerow(row)

    return results, len(references), elapsed_time


In [None]:
from llama_cpp import Llama
import llama_cpp

print("GPU offload supported:", llama_cpp.llama_supports_gpu_offload())

# Path to your GGUF model (adjust path if needed)
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-Instruct-gguf/llama-3.1-8B-Instruct-f16.gguf"

# Initialize Llama with GPU layers offloaded
llm = Llama(
    model_path=model_path,
    n_ctx=8192,
    n_gpu_layers=-1,     # Offload to GPU! Adjust as needed based on your VRAM
    verbose=False,         # Prints backend info
)

GPU offload supported: True


In [11]:
results, num_examples, elapsed_time = evaluate_qa_with_llama_cpp(
    sampled_questions,
    qa_with_llama_cpp,
    skip_unanswerable=True,
    save_path="evaluation_with_predictions_and_references_f16.csv"
)

# Print final report
print("llama.cpp (Llama-3.1-8B-Instruct) float16 QA Results:")

print(f"\nNumber of examples: {num_examples}")
print(f"Elapsed time: {elapsed_time:.2f} seconds")

print("\nQA Evaluation Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llm.close()

llama.cpp (Llama-3.1-8B-Instruct) float16 QA Results:

Number of examples: 101
Elapsed time: 300.17 seconds

QA Evaluation Results:
exact_match: 68.3168
f1: 84.6438


In [None]:
from llama_cpp import Llama
import llama_cpp

print("GPU offload supported:", llama_cpp.llama_supports_gpu_offload())

# Path to your GGUF model (adjust path if needed)
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-Instruct-gguf/llama-3.1-8B-Instruct-8bit.gguf"

# Initialize Llama with GPU layers offloaded
llm = Llama(
    model_path=model_path,
    n_ctx=8192,
    n_gpu_layers=-1,     # Offload to GPU! Adjust as needed based on your VRAM
    verbose=False         # Prints backend info
)

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
  Device 0: NVIDIA L4, compute capability 8.9, VMM: yes


GPU offload supported: True


In [None]:
results, num_examples, elapsed_time = evaluate_qa_with_llama_cpp(
    sampled_questions,
    qa_with_llama_cpp,
    skip_unanswerable=True,
    save_path="evaluation_with_predictions_and_references_8bit.csv"
)

# Print final report
print("llama.cpp (Llama-3.1-8B-Instruct) 8Bit QA Results:")

print(f"\nNumber of examples: {num_examples}")
print(f"Elapsed time: {elapsed_time:.2f} seconds")

print("\nQA Evaluation Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llm.close()

llama.cpp (Llama-3.1-8B-Instruct) 8Bit QA Results:

Number of examples: 101
Elapsed time: 178.62 seconds

QA Evaluation Results:
exact_match: 70.2970
f1: 85.1146


In [None]:
from llama_cpp import Llama
import llama_cpp

print("GPU offload supported:", llama_cpp.llama_supports_gpu_offload())

# Path to your GGUF model (adjust path if needed)
model_path = "/home/ubuntu/fast_llm_inference/llama-3.1-8B-Instruct-gguf/llama-3.1-8B-Instruct-Q4_K_M.gguf"

# Initialize Llama with GPU layers offloaded
llm = Llama(
    model_path=model_path,
    n_ctx=8192,
    n_gpu_layers=-1,     # Offload to GPU! Adjust as needed based on your VRAM
    verbose=False         # Prints backend info
)

GPU offload supported: True


In [None]:
results, num_examples, elapsed_time = evaluate_qa_with_llama_cpp(
    sampled_questions,
    qa_with_llama_cpp,
    skip_unanswerable=True,
    save_path="evaluation_with_predictions_and_references_4bit.csv"
)

# Print final report
print("llama.cpp (Llama-3.1-8B-Instruct) 4Bit QA Results:")

print(f"\nNumber of examples: {num_examples}")
print(f"Elapsed time: {elapsed_time:.2f} seconds")

print("\nQA Evaluation Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

llm.close()

llama.cpp (Llama-3.1-8B-Instruct) 4Bit QA Results:

Number of examples: 101
Elapsed time: 111.71 seconds

QA Evaluation Results:
exact_match: 67.3267
f1: 84.0383


In [21]:
# load csv results

import pandas as pd

# Load the CSV file
df = pd.read_csv("evaluation_with_predictions_and_references.csv")

df

Unnamed: 0,id,prediction_text,ground_truth_text,answer_start,exact_match,f1_score
0,57111b95a58dae1900cd6c53,lothar de maizière,lothar de maizière; lothar de maizière; lothar...,588; 588; 588,1,1.0
1,56e1c0f6cd28a01900c67b2c,complexity classes,complexity classes; complexity classes; some c...,16; 16; 11,1,1.0
2,57264228ec44d21400f3dcf9,gte,telenet was incorporated in 1973 and started o...,560; 669; 669,1,1.0
3,57263eaa38643c19005ad373,water flow through body cavity,water flow through body cavity; κτείς kteis co...,801; 90; 801,1,1.0
4,5710eca0a58dae1900cd6b3e,1705,12 may 1705; 1705; 12 may 1705,420; 427; 420,1,1.0
5,5733a6ac4776f41900660f5b,poet,poet; poet; poet,273; 273; 273,1,1.0
6,572fc6f204bcaa1900d76cf7,fact that there is no revising chamber,no revising chamber; no revising chamber; take...,313; 313; 400,0,0.6
7,56e1d9fee3433e14004231cb,npcomplete,npcomplete; npcomplete; npcomplete,244; 244; 244,1,1.0
8,57108d69b654c5140001f985,2 million,ca 2 million; 2 million; 2 million,367; 371; 371,1,1.0
9,572a020f6aef051400155199,destruction of forest,destruction of forest; destruction of forest; ...,81; 81; 81,1,1.0
