In [10]:
import os

from dotenv import load_dotenv
import pandas as pd

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler



In [11]:
load_dotenv()
open_api_key = os.environ['OPENAI_API_KEY']

In [12]:
df = pd.read_csv("data/train/fraudTrain.csv").sample(1000)

In [13]:
analysis_template = """
You are an expert in data analysis with 20 years of professional experience, holding multiple PhDs, and who excels at exploratory 
data analysis (EDA). Based on this schema: {schema}. Provide a detailed and well documented Python code for each EDA, and do not 
display the expected code output. Just output the code without labeling it as python. Provide as many helpful visualizations as 
possible and follow these steps:
- Load the data
- Perform initial explorations to get a high-level understanding of our dataset
"""

# Prompt Template
prompt_template = PromptTemplate(
    input_variables=['schema'],
    template=analysis_template
)

# Define the LLM to use
llm = OpenAI(model_name="gpt-3.5-turbo",
             temperature=0.2,
             streaming=True, 
             callbacks=[StreamingStdOutCallbackHandler()],
             verbose=True
            )
chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True)



In [14]:
schema = """
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             1000 non-null   int64  
 1   trans_date_trans_time  1000 non-null   object 
 2   cc_num                 1000 non-null   int64  
 3   merchant               1000 non-null   object 
 4   category               1000 non-null   object 
 5   amt                    1000 non-null   float64
 6   first                  1000 non-null   object 
 7   last                   1000 non-null   object 
 8   gender                 1000 non-null   object 
 9   street                 1000 non-null   object 
 10  city                   1000 non-null   object 
 11  state                  1000 non-null   object 
 12  zip                    1000 non-null   int64  
 13  lat                    1000 non-null   float64
 14  long                   1000 non-null   float64
 15  city_pop               1000 non-null   int64  
 16  job                    1000 non-null   object 
 17  dob                    1000 non-null   object 
 18  trans_num              1000 non-null   object 
 19  unix_time              1000 non-null   int64  
 20  merch_lat              1000 non-null   float64
 21  merch_long             1000 non-null   float64
 22  is_fraud               1000 non-null   int64  
dtypes: float64(5), int64(6), object(12)
"""

In [15]:
chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True)
response = chain.run(schema=schema)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert in data analysis with 20 years of professional experience, holding multiple PhDs, and who excels at exploratory 
data analysis (EDA). Based on this schema: 
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             1000 non-null   int64  
 1   trans_date_trans_time  1000 non-null   object 
 2   cc_num                 1000 non-null   int64  
 3   merchant               1000 non-null   object 
 4   category               1000 non-null   object 
 5   amt                    1000 non-null   float64
 6   first                  1000 non-null   object 
 7   last                   1000 non-null   object 
 8   gender                 1000 non-null   object 
 9   street                 1000 non-null   object 
 10  city                   1000 non-null   object 
 11  state                  1000 n

In [None]:
from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

chat = ChatOpenAI(temperature=0)

template = """
You are an expert in data analysis with 20 years of professional experience, holding multiple PhDs, and who excels at exploratory 
data analysis (EDA). Based on this schema: {schema}. Provide a detailed and well documented Python code for each EDA, and do not 
display the expected code output. Just output the code without labeling it as python. Provide as many helpful visualizations as 
possible and follow these steps:
- Load the data
- Perform initial explorations to get a high-level understanding of our dataset
- Data types, number or rows and columns, descriptive statistics
- Check for missing values 
- Let's do it step by step.
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

chain = LLMChain(llm=chat, prompt=chat_prompt)
chain.run(input_language="English", output_language="French", text="I love programming.")

In [5]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI

In [8]:
agent_executor = create_python_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
    tool=PythonREPLTool(),
    verbose=True,
    agent_type="openai-functions",
    agent_executor_kwargs={"handle_parsing_errors": True},
)

In [16]:
agent_executor

All Rights Reserved.

Copyright (c) 2000 BeOpen.com.
All Rights Reserved.

Copyright (c) 1995-2001 Corporation for National Research Initiatives.
All Rights Reserved.

Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.
All Rights Reserved., 'credits':     Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
    for supporting Python development.  See www.python.org for more information., 'license': Type license() to see the full license text, 'help': Type help() for interactive help, or help(object) for help about object., 'execfile': <function execfile at 0x10d6d5ee0>, 'runfile': <function runfile at 0x10d7c89a0>, '__IPYTHON__': True, 'display': <function display at 0x10c29b6a0>, 'get_ipython': <bound method InteractiveShell.get_ipython of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x10dadd690>>}, 'ast': <module 'ast' from '/opt/anaconda3/envs/augmented_analytics/lib/python3.11/ast.py'>, 're': <module 're' from '/opt/anaconda3/envs/augmented

In [9]:
agent_executor.run("What is the 10th fibonacci number?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI can write a function to calculate the nth Fibonacci number and then call it with n=10.
Action: Python REPL
Action Input: 
```python
def fibonacci(n):
    if n <= 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n-1) + fibonacci(n-2)

fibonacci(10)
```[0m
Observation: [36;1m[1;3m[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: 55[0m

[1m> Finished chain.[0m


'55'