### Dependencies

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import os
from dotenv import load_dotenv

from typing import Annotated, TypedDict

In [2]:
# from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

In [3]:
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

from langchain.agents import Tool # required to conver function to tool

In [4]:
load_dotenv()

True

### Test LangChain OpenAi Integration

In [6]:
# Create an LLM instance
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Make a simple inference
user_prompt = """
What is the capital of the US

"""
messages = [ HumanMessage(content=user_prompt), 
             SystemMessage(content="Make sure to present your response in bullet point")
           ]

response = llm.invoke(messages)

print(response.content)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [19]:
x = llm.invoke(messages)

### Data

In [7]:
df = pd.read_csv("Customer.csv")

df.head()

Unnamed: 0,customer_Id,DOB,Gender,city_code
0,268408,02-01-1970,M,4.0
1,269696,07-01-1970,F,8.0
2,268159,08-01-1970,F,8.0
3,270181,10-01-1970,F,2.0
4,268073,11-01-1970,M,1.0


### Nodes: Customised Tools

In [9]:
def data_summary(df: pd.DataFrame) -> str:
    """
    Returns a summary of the dataset including:
    - Shape (rows, columns)
    - Column names and data types
    - Count of missing values per column
    - Basic statistics for numeric columns
    - Top unique values for categorical columns
    """
    df = df.copy()

    summary = []
    summary.append(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.\n")

    # Column info
    col_info = pd.DataFrame({
        "dtype": df.dtypes.astype(str),
        "missing_values": df.isnull().sum(),
        "non_null_count": df.notnull().sum()
    })
    summary.append("Column Information:\n")
    summary.append(col_info.to_string())
    summary.append("\n")

    # Numeric stats
    numeric_desc = df.describe(include=[float, int]).transpose()
    summary.append("Numeric Column Statistics:\n")
    summary.append(numeric_desc.to_string())
    summary.append("\n")

    # Categorical stats
    cat_desc = df.describe(include=[object, "category"]).transpose()
    if not cat_desc.empty:
        summary.append("Categorical Column Summary:\n")
        summary.append(cat_desc.to_string())
        summary.append("\n")

    return "\n".join(summary)


print(data_summary(df))


Dataset contains 5647 rows and 4 columns.

Column Information:

               dtype  missing_values  non_null_count
customer_Id    int64               0            5647
DOB           object               0            5647
Gender        object               2            5645
city_code    float64               2            5645


Numeric Column Statistics:

              count           mean          std       min       25%       50%       75%       max
customer_Id  5647.0  271037.281034  2451.261711  266783.0  268912.0  271028.0  273180.0  275265.0
city_code    5645.0       5.472631     2.859918       1.0       3.0       5.0       8.0      10.0


Categorical Column Summary:

       count unique         top  freq
DOB     5647   4056  27-12-1988     7
Gender  5645      2           M  2892




In [32]:
tool_summary = Tool(name="summarize_data", 
                    func=data_summary, 
                    description="Returns the summary of uploaded data including statistics for numerical and non-numerical fields.")

### Agents and List of Tools

In [40]:
tools = [tool_summary]

In [41]:
agent_llm =  ChatOpenAI(model="gpt-4o-mini", temperature=0)
# bind tools to LLM
agent_llm__with_tools = agent_llm.bind_tools(tools)

### Nodes

In [None]:
def 

### State Graph

In [42]:
class State:
    messages: Annotated[list, add_messages]

In [43]:
graph_builder = StateGraph(State)