In [43]:
import csv
import pytesseract
from PIL import Image
import fitz  
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from llama_cpp import Llama
import os

# Path to the GGUF model file
MODEL_PATH = "llama3.2.1B.gguf"  

# Initialize LLaMA model
llm = Llama(model_path=MODEL_PATH)

# Initialize FinBERT ESG scoring model
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-esg', num_labels=4)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-esg')
esg_pipeline = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

def pdf_to_images(pdf_path):
    images = []
    pdf_document = fitz.open(pdf_path)  
    for page_number in range(len(pdf_document)):
        page = pdf_document[page_number]
        pix = page.get_pixmap(dpi=400)  
        image_path = f"page_{page_number + 1}.png"
        pix.save(image_path) 
        images.append(image_path)
    pdf_document.close()
    return images

# Function to extract text from an image
def extract_text_from_image(image_path):
    return pytesseract.image_to_string(Image.open(image_path))

# Function to get ESG category and score
def get_esg_score(text):
    results = esg_pipeline(text)
    if results:
        return results[0]['label'], round(results[0]['score'], 4)  
    return "Unknown", 0.0

# Function to interact with the LLM and ask questions
def ask_questions(text, questions):
    
    prompt = f"Here is some context:\n\n{text}\n\nI will now ask you some questions based on this context and answer in 2 to 3 words.\n\n"
    
    
    answers = []
    for i, question in enumerate(questions, 1):
        full_prompt = prompt + f"Question {i}: {question}\nAnswer:"
        response = llm(full_prompt, max_tokens=200, stop=["\n", "Question"])
        answers.append(response["choices"][0]["text"].strip())
    return answers

# Function to save answers and ESG data into a CSV
def save_to_csv(csv_filename, data, esg_data):
    
    file_exists = os.path.isfile(csv_filename)
    
    headers = [
        "Investment Amount",
        "ESG Category",
        "Project Type",
        "Project Location",
        "Project Duration",
        "Risk Status",
        "ESG Score"
    ]
    
    row = data + [esg_data[1]]  
    
    with open(csv_filename, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(headers)  
        writer.writerow(row)


# Main function
def main(pdf_path, csv_filename, questions):
    # Step 1: Convert PDF to images
    images = pdf_to_images(pdf_path)
    
    # Step 2: Extract text from all images and combine
    full_text = ""
    for image_path in images:
        text = extract_text_from_image(image_path)
        full_text += text + "\n"  # Combine text from all pages
    
    # Step 3: Ask questions
    answers = ask_questions(full_text, questions)
    
    # Step 4: Get ESG score
    esg_category, esg_score = get_esg_score(full_text)
    
    # Step 5: Save answers and ESG score to CSV
    if len(answers) == 6:  # Ensure all questions are answered
        save_to_csv(csv_filename, answers, (esg_category, esg_score))
        print(f"Data successfully saved to {csv_filename}")
    else:
        print("Error: Not all answers were generated. Please check the model or input text.")

# Example Usage
pdf_path = "try.pdf"  
csv_filename = "project_details.csv" 
questions = [
    "What is the requested investment amount? (value alone)",
    "What is the ESG category mentioned in the document?",
    "What type of project is being described?",
    "Where is the project located?",
    "What is the duration of the project? (value alone)",
    "Based on ESG, what is the risk of this project? (low risk, medium risk, high risk)"
]

main(pdf_path, csv_filename, questions)


llama_model_loader: loaded meta data with 35 key-value pairs and 147 tensors from llama3.2.1B.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 1B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 1B
llama_model_loader: - kv   6:                            general.license str              = llama3.2
llama_model_loader: - kv   7:                               general

Data successfully saved to project_details.csv


In [44]:
import csv
import pytesseract
from PIL import Image
import fitz 
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from llama_cpp import Llama
import os


MODEL_PATH = "llama3.2.1B.gguf" 

llm = Llama(model_path=MODEL_PATH)

finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-esg', num_labels=4)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-esg')
esg_pipeline = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

def pdf_to_images(pdf_path):
    images = []
    pdf_document = fitz.open(pdf_path) 
    for page_number in range(len(pdf_document)):
        page = pdf_document[page_number]
        pix = page.get_pixmap(dpi=400)  
        image_path = f"page_{page_number + 1}.png"
        pix.save(image_path)  
        images.append(image_path)
    pdf_document.close()
    return images

def extract_text_from_image(image_path):
    return pytesseract.image_to_string(Image.open(image_path))

def get_esg_score(text):
    results = esg_pipeline(text)
    if results:
        return results[0]['label'], round(results[0]['score'], 4)  
    return "Unknown", 0.0

def ask_questions(text, questions):
    # Build the context for the model
    prompt = f"Here is some context:\n\n{text}\n\nI will now ask you some questions based on this context and answer in 2 to 3 words.\n\n"
    
    answers = []
    for i, question in enumerate(questions, 1):
        full_prompt = prompt + f"Question {i}: {question}\nAnswer:"
        response = llm(full_prompt, max_tokens=200, stop=["\n", "Question"])
        answers.append(response["choices"][0]["text"].strip())
    return answers

def save_to_csv(csv_filename, data, esg_data):
    # Check if file exists to avoid writing headers repeatedly
    file_exists = os.path.isfile(csv_filename)
    
    headers = [
        "Investment Amount",
        "ESG Category",
        "Project Type",
        "Project Location",
        "Project Duration",
        "Risk Status",
        "ESG Score"
    ]
    
    # Combine data with ESG data but exclude ESG Score Category
    row = data + [esg_data[1]]  
    
    with open(csv_filename, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(headers) 
        writer.writerow(row)

def process_pdf(pdf_path, csv_filename, questions):
    # Step 1: Convert PDF to images
    images = pdf_to_images(pdf_path)
    
    # Step 2: Extract text from all images and combine
    full_text = ""
    for image_path in images:
        text = extract_text_from_image(image_path)
        full_text += text + "\n"  
    
    # Step 3: Ask questions
    answers = ask_questions(full_text, questions)
    
    # Step 4: Get ESG score
    esg_category, esg_score = get_esg_score(full_text)
    
    # Step 5: Save answers and ESG score to CSV
    if len(answers) == 6:  # Ensure all questions are answered
        save_to_csv(csv_filename, answers, (esg_category, esg_score))
        print(f"Data successfully saved to {csv_filename}")
    else:
        print(f"Error: Not all answers were generated for {pdf_path}. Please check the model or input text.")

# Main function to process all PDFs in a folder
def process_pdfs_in_folder(folder_path, csv_filename, questions):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith('.pdf'):
            pdf_path = os.path.join(folder_path, filename)
            print(f"Processing {filename}...")
            process_pdf(pdf_path, csv_filename, questions)

folder_path = "pdf"  
csv_filename = "project_details.csv"  
questions = [
    "What is the requested investment amount? (value alone)",
    "What is the ESG category mentioned in the document?",
    "What type of project is being described?",
    "Where is the project located?",
    "What is the duration of the project? (value alone)",
    "Based on ESG, what is the risk of this project? (low risk, medium risk, high risk)"
]

process_pdfs_in_folder(folder_path, csv_filename, questions)


llama_model_loader: loaded meta data with 35 key-value pairs and 147 tensors from llama3.2.1B.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 1B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 1B
llama_model_loader: - kv   6:                            general.license str              = llama3.2
llama_model_loader: - kv   7:                               general

Processing 1-1.pdf...


llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   118 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     760.89 ms /   122 tokens
Llama.generate: 103 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     150.43 ms /    16 tokens
Llama.generate: 103 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print:

Data successfully saved to project_details.csv
Processing 5.pdf...


Llama.generate: 6 prefix-match hit, remaining 105 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   105 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     670.54 ms /   109 tokens
Llama.generate: 96 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     193.41 ms /    19 tokens
Llama.generate: 96 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_con

Data successfully saved to project_details.csv
Processing 8.pdf...


Llama.generate: 6 prefix-match hit, remaining 107 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   107 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     699.13 ms /   111 tokens
Llama.generate: 98 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     2 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     146.80 ms /    17 tokens
Llama.generate: 98 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_con

Data successfully saved to project_details.csv
Processing 3.pdf...


Llama.generate: 6 prefix-match hit, remaining 110 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   110 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     721.06 ms /   114 tokens
Llama.generate: 101 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     158.32 ms /    16 tokens
Llama.generate: 101 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 6.pdf...


Llama.generate: 6 prefix-match hit, remaining 117 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   117 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     772.55 ms /   121 tokens
Llama.generate: 108 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     3 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     191.13 ms /    18 tokens
Llama.generate: 108 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 4-1.pdf...


Llama.generate: 6 prefix-match hit, remaining 113 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   113 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    1019.65 ms /   117 tokens
Llama.generate: 104 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     244.83 ms /    19 tokens
Llama.generate: 104 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 9.pdf...


Llama.generate: 6 prefix-match hit, remaining 113 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   113 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     757.24 ms /   117 tokens
Llama.generate: 104 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     3 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     176.44 ms /    18 tokens
Llama.generate: 104 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 2_1.pdf...


Llama.generate: 6 prefix-match hit, remaining 117 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   117 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     769.68 ms /   121 tokens
Llama.generate: 108 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     2 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     155.16 ms /    17 tokens
Llama.generate: 108 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 10-2.pdf...


Llama.generate: 6 prefix-match hit, remaining 111 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   111 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     784.77 ms /   115 tokens
Llama.generate: 102 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     2 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     171.74 ms /    17 tokens
Llama.generate: 102 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv
Processing 7.pdf...


Llama.generate: 6 prefix-match hit, remaining 116 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   116 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     4 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     760.04 ms /   120 tokens
Llama.generate: 107 prefix-match hit, remaining 15 prompt tokens to eval
llama_perf_context_print:        load time =     677.91 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    15 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     5 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     221.85 ms /    20 tokens
Llama.generate: 107 prefix-match hit, remaining 12 prompt tokens to eval
llama_perf_c

Data successfully saved to project_details.csv


In [45]:
import pandas as pd

# Function to calculate ROI dynamically
def calculate_roi(row):
    # Base ROI based on Project Type
    if "Renewable" in row["Project Type"] or "Green" in row["ESG Category"]:
        base_roi = 15
    elif "Water" in row["Project Type"] or "Sustainable" in row["ESG Category"]:
        base_roi = 12
    elif "Infrastructure" in row["Project Type"] or "Urban" in row["ESG Category"]:
        base_roi = 10
    else:
        base_roi = 8  # Default base ROI

    # Risk Adjustment
    risk_adjustment = {"low risk": 10, "medium risk": 0, "high risk": -10}
    risk_status = row["Risk Status"].lower()  # Convert to lowercase for uniformity
    adjustment = risk_adjustment.get(risk_status, 0)

    # ESG Score Contribution
    esg_contribution = row["ESG Score"] * 3  # ESG score weighted 30% of ROI

    # Final ROI
    final_roi = base_roi + adjustment + esg_contribution
    return round(final_roi, 2)

# Function to process the CSV file
def process_csv(input_file, output_file):
    try:
        # Load the CSV file
        df = pd.read_csv(input_file)
        
        # Apply ROI calculation
        df["ROI"] = df.apply(calculate_roi, axis=1)
        
        # Save the updated DataFrame to a new CSV file
        df.to_csv(output_file, index=False)
        print(f"Updated CSV file saved to: {output_file}")
    except Exception as e:
        print(f"Error processing file: {e}")

# Example usage
input_file = "project_details.csv"  # Replace with the path to your input CSV file
output_file = "output_projects_with_roi.csv"  # Replace with the desired output file name

process_csv(input_file, output_file)


Updated CSV file saved to: output_projects_with_roi.csv


In [74]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load dataset dynamically
def load_dataset(file_path):
    """
    Load the dataset from a CSV file dynamically.
    """
    try:
        return pd.read_csv(file_path)
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

def preprocess_data(df, target_column):
    """
    Preprocess the dataset by handling categorical and numerical variables.
    """
    if 'Investment Amount' in df.columns:
        df['Investment Amount'] = (
            df['Investment Amount']
            .replace({'\$': '', ',': ''}, regex=True)
            .astype(float)
        )

    categorical_columns = df.select_dtypes(include=['object']).columns
    df[categorical_columns] = df[categorical_columns].apply(lambda x: x.str.strip().str.lower())

    X = df.drop(columns=[target_column])
    y = df[target_column]

    numerical_features = X.select_dtypes(include=['float64', 'int64']).columns
    categorical_features = X.select_dtypes(include=['object']).columns

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ]
    )

    X_preprocessed = preprocessor.fit_transform(X)

    return X_preprocessed, y, preprocessor

def split_data(X, y, test_size=0.2):
    """
    Split the dataset into features (X) and target (y).
    """
    return train_test_split(X, y, test_size=test_size, random_state=42)

def train_model(X_train, y_train):
    """
    Train a Random Forest Regressor model.
    """
    model = RandomForestRegressor(random_state=42, n_estimators=100)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model using mean squared error and R^2 score.
    """
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, r2, y_pred

def connect_to_llama_model(input_data):
    """
    Simulate a connection to a Llama model.
    Replace with actual Llama integration logic.
    """
    return f"Llama processed input: {input_data}"

# Main function
def main(file_path, target_column):
    """
    Main workflow for ROI prediction dynamically.
    """
    # Step 1: Load the dataset
    df = load_dataset(file_path)
    if df is None:
        return

    print("Original Dataset Loaded:")
    print(df.head())

    # Step 2: Preprocess the dataset
    X_preprocessed, y, preprocessor = preprocess_data(df, target_column)
    print("\nData Preprocessed Successfully!")

    # Step 3: Split the dataset
    X_train, X_test, y_train, y_test = split_data(X_preprocessed, y)

    # Step 4: Train the predictive model
    model = train_model(X_train, y_train)
    print("\nModel Trained Successfully!")

    # Step 5: Evaluate the model
    mse, r2, y_pred = evaluate_model(model, X_test, y_test)
    print("\nModel Evaluation:")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

    # Step 6: Simulate Llama model connection
    llama_response = connect_to_llama_model(X_test)
    print("\nLlama Model Response:")
    print(llama_response)
    print("\nModel Evaluation:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"R² Score: {r2:.4f}")


# Execute the main function
if __name__ == "__main__":
    file_path = "output_projects_with_roi.csv" 
    target_column = "ROI" 
    main(file_path, target_column)



Original Dataset Loaded:
  Investment Amount                    ESG Category  \
0            67,275                           Green   
1           500,000  Sustainable Urban Development.   
2         $250,000               Water Conservation   
3         $200,000                      Environment   
4         $450,000          Renewable Energy Source   

                                Project Type Project Location  \
0                 Geothermal Energy Project.   United States.   
1                      Building construction           Canada   
2  Water Conservation and Recycling Project.       Australia.   
3                    Waste Management System          Germany   
4                         Offshore wind farm               UK   

  Project Duration  Risk Status  ESG Score    ROI  
0        19 months  Medium Risk     0.9922  17.98  
1               30    High risk     0.9890   4.97  
2        18 months  medium risk     0.9797  14.94  
3        24 months    High risk     0.9922   

  .replace({'\$': '', ',': ''}, regex=True)


In [None]:
#Data Visualization


