import torch

torch.cuda.is_available()

In [1]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain.agents import create_structured_chat_agent, AgentExecutor, AgentOutputParser
from langchain_ollama.llms import OllamaLLM
import pandas as pd
from langchain_core.tools import BaseTool  # Use langchain_core.tools instead of langchain.tools (newer)
from typing import Type
from pydantic import BaseModel, Field
# from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.tools import Tool

In [3]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = ChatPromptTemplate.from_template(template)

In [4]:
model = OllamaLLM(model="gemma3:1b")

chain = prompt | model

In [5]:
chain.invoke({"question": "What is LangChain?"})

'Okay, let\'s think step by step about what LangChain is.\n\n1. **What is a Language Model?** First, we need to understand what a language model is. A language model is a type of AI that\'s trained to predict the next word in a sequence. Think of it as a really good autocomplete.  It\'s built on large datasets of text and learns patterns from that data.\n\n2. **What is a Chain?** Now, a "chain" refers to a series of interconnected components.  It\'s a way to combine multiple AI models or tools to accomplish a more complex task.  Instead of relying on just one model, you can build a chain to handle different parts of the process.\n\n3. **LangChain\'s Purpose:** LangChain is a framework designed to make building and using these "chains" of AI models easier. It provides tools and components to help developers:\n    * **Design applications:**  It allows you to structure your application logically.\n    * **Connect models:**  It allows you to easily link different language models (like GPT-

# Create a custom tool for reading csv files and returning a description on it

In [2]:
class CSVSummaryInput(BaseModel):
    file_path: str = Field(..., description="Path to the CSV file")

class CSVSummaryTool(BaseTool):
    name: str = "csv_summary_tool"
    description: str = "Reads a CSV file and summarizes null counts, numeric and categorical columns."
    args_schema: Type[BaseModel] = CSVSummaryInput

    def _run(self, file_path: str) -> str:
        try:
            df = pd.read_csv(file_path)
        except Exception as e:
            return f"Failed to read CSV file: {e}"

        total_rows, total_columns = df.shape

        null_counts = df.isnull().sum()
        null_summary = "\n".join([f"- {col}: {count} nulls" for col, count in null_counts.items()])

        numeric_cols = df.select_dtypes(include='number').columns.tolist()
        categorical_cols = df.select_dtypes(include='object').columns.tolist()

        summary = (
            f"CSV File Summary:\n"
            f"- Total Rows: {total_rows}\n"
            f"- Total Columns: {total_columns}\n\n"
            f"Null values per column:\n{null_summary}\n\n"
            f"Column Type Summary:\n"
            f"- Numeric Columns ({len(numeric_cols)}): {', '.join(numeric_cols)}\n"
            f"- Categorical Columns ({len(categorical_cols)}): {', '.join(categorical_cols)}"
        )
        return str(summary)

    def _arun(self, file_path: str) -> str:
        raise NotImplementedError("Async not implemented yet.")


In [3]:
model = OllamaLLM(model="mistral")

In [4]:
human = '''{input}

{agent_scratchpad}

(reminder to respond in a JSON blob no matter what)'''

In [8]:
system = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:

{tools} You are a smart assistant that can use tools when needed. tools you have is {tools} with the name {tool_names}. 
you must use {tool_names} to get the content of the {file_path} file in order to give the final answer base on the content.


Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: first {tool_names} then second sequence is "Final Answer"

Provide only ONE action per $JSON_BLOB, as shown:

```
{{
  "action": $TOOL_NAME,
  "action_input": $INPUT is the {file_path}
}}
```

Follow this format:

Question: input question to answer
Thought: consider previous and subsequent steps
```
$JSON_BLOB

```
Thought: I know what to respond
Action:
```
{{
  "action": "Final Answer",
  "action_input": "Final response to human"
}}

Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation'''

In [9]:
# Create the tool
csv_tool = CSVSummaryTool()
# 3. Define the prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", human),
    # ("ai", "{agent_scratchpad}"),
    # MessagesPlaceholder(variable_name="agent_scratchpad"),
])


# Build agent properly for Ollama
agent = create_structured_chat_agent(
    llm=model,
    tools=[csv_tool],
    prompt=prompt,
)


# Executor
agent_executor = AgentExecutor(agent=agent, tools=[csv_tool], verbose=True,handle_parsing_errors=False)

In [11]:
 # Run with explicit memory handling
response = agent_executor.invoke({
    "input": "use the tools you have to read this csv file and analyze it",
    "file_path": "./data_hourly.csv"  # Pass file path as input
})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m ```
{
  "action": "csv_summary_tool",
  "action_input": "./data_hourly.csv"
}

[36;1m[1;3mCSV File Summary:he result from csv_summary_tool.[0m
- Total Rows: 13515600
- Total Columns: 12

Null values per column:
- ID: 0 nulls
- From: 0 nulls
- Date: 0 nulls
- Hour: 0 nulls
- Participation_Phase: 0 nulls
- Demand_kWh: 0 nulls
- Price_signal: 11986392 nulls
- Experiment_price_NOK_kWh: 11986392 nulls
- Temperature: 0 nulls
- Temperature24: 0 nulls
- Temperature48: 0 nulls
- Temperature72: 0 nulls

Column Type Summary:
- Numeric Columns (7): Hour, Demand_kWh, Experiment_price_NOK_kWh, Temperature, Temperature24, Temperature48, Temperature72
[32;1m[1;3m ```umns (5): ID, From, Date, Participation_Phase, Price_signal[0m
{
  "action": "Final Answer",
  "action_input": "The CSV file contains 13515600 rows and 12 columns. There are 7 numeric columns (Hour, Demand_kWh, Experiment_price_NOK_kWh, Temperature, Temperature24, Temperat

In [12]:
print(response["output"])

The CSV file contains 13515600 rows and 12 columns. There are 7 numeric columns (Hour, Demand_kWh, Experiment_price_NOK_kWh, Temperature, Temperature24, Temperature48, Temperature72) and 5 categorical columns (ID, From, Date, Participation_Phase, Price_signal). The Price_signal column has a high number of null values.
