In [1]:
import openai
import pandas as pd


class python_agent:
    def __init__(self, csv_file_path: str, openai_api_key: str):
        """
        csv_file_path: pass your csv dataset path 
        """
        self.csv_file_path = csv_file_path
        self.dataframe = pd.read_csv(self.csv_file_path)
        self.open_ai_api_key = openai_api_key
    
    def df_prompt(self, df):
        """ 
        df: pandas dataframe 
        This function will return the data prompt for the dataframe
        """
        self.prompt = "The data is in this format: \ncolumn | sample_data | datatype \n"
        for column in df.columns:
            self.datatype = df[column].dtype
            self.sample_data = ",".join([str(i) for i in list(df[column].unique())[:5]])
            self.prompt += f"{column} | {self.sample_data} | {self.datatype} \n"
        return str(self.prompt) 
        

    def generate_code(self,prompt):
        """ 
        prompt: prompt
        This function will generate and return the python code
        """
        openai.api_key = self.open_ai_api_key
        self.messages = [
            {"role": "system", "content": prompt}
        ]

        self.response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo-16k",
            messages=self.messages
        )

        self.ChatGPT_reply = self.response['choices'][0]['message']['content']
        self.python_code = self.ChatGPT_reply.split("```python")[-1].split("```")[0]
        return self.python_code


    def run_code(self,code):
        """ 
        code: python code
        This function is the real executor
        """
        try:
            exec(code)
            return True, code
        except Exception as e:
            return False, e

    def learn_from_feedback(self, code, error_message):
        """ 
        code: python code
        error_message: Exception encountered while execution of the code
        This function will return feedback
        """
        self.feedback = f""" 
        python_code: {code}
        error: {error_message}
        """
        return self.feedback

    def ask_question(self, question):
        """ 
        question: ask question based on dataset
        This function is the main executor
        """
        self.attempt = 0
        self.max_attempts = 3  
        self.dfprompt = self.df_prompt(self.dataframe)
        self.prompt = self.initial_prompt(self.dfprompt, question) 
        

        while self.attempt < self.max_attempts:
            self.code = self.generate_code(self.prompt)
            self._, self.output = self.run_code(self.code)

            if self._ == True:
                return self.output
            else:
                self.prompt += self.learn_from_feedback(self.code, self.output) 
                self.attempt += 1

        return "Unable to generate a working solution."

    def initial_prompt(self, df_prompt, question):
        """ 
        question: user_question
        df_prompt: dataframe prompt
        This function will return intial prompt
        """
        self.base_prompt = f""" 
        You are an agent designed to write and execute python code to answer questions. 
        You will provide only python code, nothing else. Your role is to interpret user queries based on 
        the data  given below and respond with precise, executable Python scripts. Each response should be 
        a self-contained code snippet, tailored to the specific requirements of the question. This includes 
        appropriate function definitions, necessary library imports, and brief inline comments for clarity. 
        The code should be written in a clean, efficient manner, following Python's best practices. 
        For more complex problems, the code may include modular functions or classes to demonstrate a 
        structured approach to problem-solving. Your responses should not include any additional 
        explanations or non-code elements. Your primary objective is to provide clear, direct, and 
        practical Python solutions that users can readily execute to obtain their desired results.
        No need to create any dataframe just use pandas read csv and read the filename: {self.csv_file_path} You
        will answer from the below data example:\n
        """
        self.base_prompt += df_prompt
        self.base_prompt += f"\n\nQuestion: {question}"
        return self.base_prompt


In [2]:
openai_api_key = ""
df_path = ""
agentpy = python_agent(df_path,openai_api_key)

In [None]:
question = ""
var = agentpy.ask_question(question)