In [None]:
import requests

def query_prometheus(prom_ql):
    # Base URL for Prometheus (usually port 9090)
    # Replace with your actual Prometheus IP
    URL = "http://167.71.227.138:9090/api/v1/query"
    
    params = {
        'query': prom_ql
    }

    try:
        response = requests.get(URL, params=params, timeout=10)
        if response.status_code == 200:
            results = response.json()['data']['result']
            return results
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Connection Failed: {str(e)}"

# Example: Get average CPU usage over the last 5 minutes
cpu_query = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
data = query_prometheus(cpu_query)
print(data)

[{'metric': {'instance': '134.209.146.26:9100'}, 'value': [1770638972.341, '3.4562500000053404']}, {'metric': {'instance': 'SaltClient:9100'}, 'value': [1770638972.341, '1.4112676408270772']}]


In [None]:
pip install langchain_huggingface

: 

In [None]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

llm_model = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-7B-Instruct",
    huggingfacehub_api_token="",
    task="text-generation",
    max_new_tokens=128,
)


llm = ChatHuggingFace(llm=llm_model)

: 

In [None]:
import requests
import datetime

def send_to_alertmanager(summary, description):
    # The Alertmanager API endpoint
    URL = "http://167.71.227.138:9093/api/v2/alerts"
    
    # Standard Alertmanager JSON format
    payload = [{
        "labels": {
            "alertname": "AIAgentResponse",
            "severity": "info",
            "source": "ai-bot"
        },
        "annotations": {
            "summary": summary,
            "description": description
        },
        "startsAt": datetime.datetime.now().isoformat() + "Z"
    }]

    try:
        response = requests.post(URL, json=payload)
        if response.status_code == 200:
            return "Success: Message routed through Alertmanager."
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Connection Failed: {str(e)}"

In [None]:
# Integration: LLM Analysis of Prometheus Data -> AlertManager

if 'data' in locals() and data:
    # 1. Prepare the prompt with the data from Cell 1
    # Note: The query was for CPU idle time. 
    # High Idle (near 100) = Low Usage. Low Idle = High Usage.
    prompt = f"""
    Analyze the following Prometheus metrics for CPU Mode="idle".
    Data: {data}
    
    Interpret the values:
    - Only look at the 'value' field. The second element in the list is the percentage.
    - Since this is 'idle' mode: 
      - Value near 0 means the CPU is free (Good/Low Load).
      - Value near 100 means the CPU is busy (High Load).
    
    Provide a concise summary of the health of the instances.
    """

    print("Sending prompt to LLM...")
    try:
        # 2. Invoke LLM (defined in Cell 2)
        response = llm.invoke(prompt)
        ai_analysis = response.content
        print(f"LLM Analysis:\n{ai_analysis}\n")
        
        # 3. Send to Alertmanager (defined in Cell 3)
        print("Routing to Alertmanager...")
        alert_status = send_to_alertmanager(
            summary="AI Analysis of CPU Metrics", 
            description=ai_analysis
        )
        print(alert_status)

    except Exception as e:
        print(f"Error during AI processing: {e}")
else:
    print("No data found. Please run the first cell to query Prometheus.")