# Analisando dados com Assistants Code Interpreter 

### Lendo arquivo e extraindo algumas informações

In [2]:
import pandas as pd

dataset = pd.read_csv('arquivos/supermarket_sales.csv')
dataset.head()

Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax 5%,Total,Date,Time,Payment,cogs,gross margin percentage,gross income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,1/5/2019,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,3/8/2019,10:29,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,3/3/2019,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,1/27/2019,20:33,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2/8/2019,10:37,Ewallet,604.17,4.761905,30.2085,5.3


#### Verificando Rating media

In [3]:
dataset['Rating'].mean()

6.9727

### Instanciando Client

In [4]:
import openai
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

client = openai.Client()

In [5]:
teste = pd.read_csv('arquivos/supermarket_sales.csv')
teste

Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax 5%,Total,Date,Time,Payment,cogs,gross margin percentage,gross income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,1/5/2019,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.8200,80.2200,3/8/2019,10:29,Cash,76.40,4.761905,3.8200,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,3/3/2019,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.2880,489.0480,1/27/2019,20:33,Ewallet,465.76,4.761905,23.2880,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2/8/2019,10:37,Ewallet,604.17,4.761905,30.2085,5.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,233-67-5758,C,Naypyitaw,Normal,Male,Health and beauty,40.35,1,2.0175,42.3675,1/29/2019,13:46,Ewallet,40.35,4.761905,2.0175,6.2
996,303-96-2227,B,Mandalay,Normal,Female,Home and lifestyle,97.38,10,48.6900,1022.4900,3/2/2019,17:16,Ewallet,973.80,4.761905,48.6900,4.4
997,727-02-1313,A,Yangon,Member,Male,Food and beverages,31.84,1,1.5920,33.4320,2/9/2019,13:22,Cash,31.84,4.761905,1.5920,7.7
998,347-56-2442,A,Yangon,Normal,Male,Home and lifestyle,65.82,1,3.2910,69.1110,2/22/2019,15:33,Cash,65.82,4.761905,3.2910,4.1


## Criando o Assistant

In [6]:
# Enviando o arquivo para o assistant

file = client.files.create(
    file=open('arquivos/supermarket_sales.csv', 'rb'),  # Modo binario
    purpose='assistants'   # Proposito
)

In [7]:
# Criando o Assistant e passando o file

assistant = client.beta.assistants.create(
    name="Analista Fianceiro Supermercados",
    instructions="Você é um analista financeiro de um supermercado. Você utiliza os dados do formato csv relativo às vendas \
        do supermercado para realizar as suas análises",
    tools=[{'type': 'code_interpreter'}],
    tool_resources={
    "code_interpreter": {
      "file_ids": [file.id]  # Lista de arquivos
    }},    
    model='gpt-4o'
)

## Cria uma thread

In [13]:
#thread = client.beta.threads.create()


thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "Qual é o rating médio das vendas do nosso supermercado?'",
      "attachments": [
        {
          "file_id": file.id,
          "tools": [{"type": "code_interpreter"}]
        }
      ]
    }
  ]
)

## Adiciona mensagem a thread criada

pergunta = 'Qual é o rating médio das vendas do nosso supermercado?'
pergunta = 'Gere um gráfico pizza com o percentual de vendas por meio de pagamento'

messages = client.beta.threads.messages.create(
    thread_id=thread.id,
    role='user',
    content=pergunta
)

## Roda a thread no assistant

In [14]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions='O formato do arquivo é csv.'
)

## Aguarda a thread rodar

In [15]:
import time

while run.status in ['queued', 'in_progress', 'cancelling']:
    time.sleep(1)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id
    )

print(run.status)

completed


## Verifica a resposta

In [16]:
if run.status == 'completed':
    messages = client.beta.threads.messages.list(    # Listando as mensagens que tem na thread
        thread_id=thread.id
    )
    print(messages)
else:
    print('Erro', run.status)

SyncCursorPage[Message](data=[Message(id='msg_0On1qpRS4qd9FtiI1l1L1BLX', assistant_id='asst_9oU3JDRPWOxPyjVzinIz289v', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='O rating médio das vendas do seu supermercado é aproximadamente 6.97.'), type='text')], created_at=1716567880, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_eRsSTLPSsA1sIeMG3HTKPPfj', status=None, thread_id='thread_Mm6ioF9dU0zwsbnplapdV1wb'), Message(id='msg_bH1QaPSRzFk0cIUQ96lvZpaN', assistant_id=None, attachments=[Attachment(file_id='file-6KXhCCYPWLrLY7JNeBxtI8sO', tools=[CodeInterpreterTool(type='code_interpreter')])], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Qual é o rating médio das vendas do nosso supermercado?'"), type='text')], created_at=1716567871, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=No

### Verificando somente o conteudo

O ultimo conteudo é a resposta do modelo

In [17]:
messages.data[0].content[0]

TextContentBlock(text=Text(annotations=[], value='O rating médio das vendas do seu supermercado é aproximadamente 6.97.'), type='text')

# Analisando os passos do modelo

In [18]:
run_steps = client.beta.threads.runs.steps.list(
  thread_id=thread.id,
  run_id=run.id
)

### Iterando sobre cada passo e detalhando-os.

In [19]:
for step in run_steps.data[::-1]:
    print('======= Step >', step.step_details.type)
    
    if step.step_details.type == 'tool_calls':     # Se o passo for tool calls, detalhe-o
        for tool_call in step.step_details.tool_calls:
            print('```')
            print(tool_call.code_interpreter.input)
            print('```')
            if tool_call.code_interpreter.outputs[0].type == 'logs':
                print('Result')
                print(tool_call.code_interpreter.outputs[0].logs)
                
    if step.step_details.type == 'message_creation':       # Se o passo for message_creation:
        message = client.beta.threads.messages.retrieve(
            thread_id=thread.id,
            message_id=step.step_details.message_creation.message_id
        )
        if message.content[0].type == 'text':              # Se for texto,
            print(message.content[0].text.value)

        if message.content[0].type == 'image_file':        # Se for imagem
            file_id = message.content[0].image_file.file_id
            image_data = client.files.content(file_id)

            with open(f'arquivos/{file_id}.png', 'wb') as file:
                file.write(image_data.read())

            import matplotlib.pyplot as plt                   # Visualizando imagem
            import matplotlib.image as mpimg

            img = mpimg.imread(f'arquivos/{file_id}.png')
            fig, ax = plt.subplots()
            ax.set_axis_off()
            ax.imshow(img)
            plt.show()
        

```
import pandas as pd

# Carregar o arquivo CSV
file_path = '/mnt/data/file-6KXhCCYPWLrLY7JNeBxtI8sO'
data = pd.read_csv(file_path)

# Exibir as primeiras linhas do dataframe para entender sua estrutura
data.head()
```
Result
    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and