In [None]:

!pip install ibm-watson-machine-learning ibm_watson ibm-cloud-sdk-core


In [None]:
import os
import json
from ibm_watson import DiscoveryV2
from ibm_watson_machine_learning import APIClient
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

In [None]:


# Set up environment variables for security
# Replace with your actual credentials when you have access
API_KEY = "your_api_key_here"
DISCOVERY_URL = "your_discovery_url"
DISCOVERY_PROJECT_ID = "your_discovery_project_id"

# Set up Watson Discovery for document retrieval
discovery_authenticator = IAMAuthenticator(API_KEY)
discovery = DiscoveryV2(
    version='2023-06-15',
    authenticator=discovery_authenticator
)
discovery.set_service_url(DISCOVERY_URL)

# Set up Watson Machine Learning for access to foundation models
wml_credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": API_KEY # Ask Seana for this as she had admin access
}

wml_client = APIClient(wml_credentials)
project_id = "b10e6698-f168-48c2-afb3-ec6bb6f93d80"
wml_client.set.default_project(project_id)

# Carbon footprint specific model - using a foundation model well-suited for scientific analysis
model_id = "ibm/granite-13b-instruct-v2"  # Or another appropriate IBM model


In [None]:
def carbon_footprint_rag(query, top_k=5):
    """
    Carbon Footprint Analysis RAG pipeline using IBM Watson services

    Args:
        query (str): User query about carbon footprint
        top_k (int): Number of documents to retrieve

    Returns:
        dict: Response with retrieved information and analysis
    """
    # Step 1: Retrieve relevant carbon footprint documents from Watson Discovery
    # Adding specific carbon footprint related filters
    discovery_response = discovery.query(
        project_id=DISCOVERY_PROJECT_ID,
        natural_language_query=query,
        count=top_k,
        filter="document_type:carbon_report,emission_data,sustainability",
        passages={
            "enabled": True,
            "count": 5,
            "fields": ["text", "emission_metrics", "carbon_data"],
            "characters": 500,
            "per_document": True
        }
    ).get_result()

    # Extract relevant passages from the retrieved documents
    retrieved_passages = []
    carbon_metrics = []

    for result in discovery_response.get('results', []):
        # Extract main content
        if 'text' in result:
            retrieved_passages.append(result['text'])

        # Extract specific carbon metrics when available
        if 'emission_metrics' in result:
            carbon_metrics.append(result['emission_metrics'])

        # Check for passages which might contain more specific information
        for passage in result.get('passages', []):
            if 'passage_text' in passage:
                retrieved_passages.append(passage['passage_text'])

    # Create context with focus on carbon data
    context = "\n\n".join(retrieved_passages)

    # Add any structured carbon metrics data
    if carbon_metrics:
        metrics_summary = "\nCARBON METRICS SUMMARY:\n" + json.dumps(carbon_metrics, indent=2)
        context += metrics_summary

    # Step 2: Generate carbon analysis using Watson Machine Learning foundation model
    carbon_analysis_prompt = f"""
    You are a carbon footprint analysis expert. Use the following retrieved information to provide
    detailed analysis about the carbon footprint question.

    CONTEXT INFORMATION:
    {context}

    USER QUESTION: {query}

    Provide a comprehensive analysis that includes:
    1. Direct answer to the question with specific carbon metrics when available
    2. Recommendations for carbon footprint reduction if applicable
    3. Sources of the information from the context
    4. Any limitations in the analysis based on the available data

    ANALYSIS:
    """

    # Generate the analysis using the WML foundation model
    parameters = {
        "decoding_method": "greedy",
        "max_new_tokens": 500,
        "min_new_tokens": 100,
        "temperature": 0.5,  # Lower temperature for more factual responses
        "repetition_penalty": 1.2,  # Discourage repetition
    }

    response = wml_client.foundation_models.generate(
        model_id=model_id,
        text=carbon_analysis_prompt,
        parameters=parameters
    )

    generated_analysis = response["results"][0]["generated_text"]

    return {
        'query': query,
        'context_sources': len(retrieved_passages),
        'carbon_metrics_found': len(carbon_metrics) > 0,
        'analysis': generated_analysis
    }



In [None]:
# <aim function to execute
if __name__ == "__main__":
    sample_queries = [
        "What is the carbon footprint of cloud computing?",
        "How can our company reduce emissions from data centers?",
        "Compare the environmental impact of on-premise vs cloud infrastructure",
        "What metrics should we track for our corporate carbon footprint report?"
    ]

    for query in sample_queries:
        print(f"\n\nAnalyzing: {query}")
        try:
            result = carbon_footprint_rag(query)
            print(f"Analysis: {result['analysis']}")
        except Exception as e:
            print(f"Error processing query: {str(e)}")