In [2]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
import pandas as pd
from langchain.tools import Tool
from crewai import Agent, Task, Crew, Process

import os
from utils import get_openai_api_key
openai_api_key = get_openai_api_key()
os.environ["OPENAI_MODEL_NAME"] = 'gpt-4-turbo'

pandas_T = create_pandas_dataframe_agent(
    ChatOpenAI(temperature=0, model="gpt-4-turbo"),
    pd.read_csv(
    "concatenated_df.csv"),
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS, allow_dangerous_code=True
)

pandas_tool = Tool(
  name="pandas dataframe agent",
  func=pandas_T.run,
  description="useful to analyze pandas dataframe",
)


# Create agents
analyst_agent = Agent(role="Data Analyst",
    goal="Analyze data and provide insights starting from the input query from the user",
    backstory='you are expert data scientist with well knowledge of python and pandas package to manipulate dataframes',
    #tools=[pandas_tool],
    allow_delegation=False,
    verbose=True
    
)

post_writer=Agent(
    role="Content Writer",
    goal="Write short summary of the analysit_agent results. Use a Linkedin style post for  both techincal and non techical audience",
    backstory="you are social media manager with great expertise to write Linkedin posts"
              "you are in charge to write Linkedin posts based on analyst_agent results for both technical and non-techical audience audience",
    allow_delegation=False,
    verbose=True
)

# Create tasks
data_analysis_task = Task(
    description="Analyze the data and provide insights on the basis of the {user_query} from the user",
    expected_output="a clear insight descriptions of the operation results",
    tools=[pandas_tool],
    agent=analyst_agent,
)

write_task = Task(
    description=(
       "write Linkedin posts based on analyst_agent results for both technical and non-techical audience audience"
    ),
    expected_output="write Linkedin posts based on analyst_agent results for both technical and non-techical audience audience",
    output_file="linkedin_post.txt",
    agent=post_writer,
)

# Create the crew
crew = Crew(
    agents=[analyst_agent,post_writer],
    tasks=[data_analysis_task,write_task],
    process=Process.sequential,
     verbose=True
)

# Kick off the process
# Kick off the crew with a query
user_query = {"user_query":"what is the bank with the highest Consolidated Assets?"}
result = crew.kickoff(inputs=user_query)
print(result)

[1m[95m [2024-09-10 00:07:14][DEBUG]: == Working Agent: Data Analyst[00m
[1m[95m [2024-09-10 00:07:14][INFO]: == Starting Task: Analyze the data and provide insights on the basis of the what is the bank with the highest Consolidated Assets? from the user[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: To determine which bank has the highest consolidated assets, I will need to analyze the data regarding the assets of various banks. This will involve identifying the column that represents the consolidated assets in the dataset and then finding the maximum value within that column.

Action: pandas dataframe agent
Action Input: {"tool_input": "Identify the bank with the highest consolidated assets"}[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\ndf = pd.DataFrame({\n    'Name': ['FARMERS BK/FARMERS BHC', 'SOLERA NB/SOLERA NAT BC', 'APEX BK/APEX BC', 'PROVIDENCE BK/PB FC',