In [1]:
import io
import sys
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import json

from openai.types.chat.chat_completion import ChatCompletion
from openai.types.beta.threads import ImageFileContentBlock

sys.path.append(str(Path(".").resolve().parent))

from config.openai_client import client

In [2]:
def show_json(obj):
    display(json.loads(obj.model_dump_json()))

# Запрос в Chat

Дока для API: https://platform.openai.com/docs/api-reference/chat

In [2]:
text: str = "What is this 'ML'?"
response: ChatCompletion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": text}],
    max_tokens=1024,
    temperature=0.5,
    # logprobs=True,
)

In [3]:
response.choices[0].message.content

'ML stands for machine learning, which is a subset of artificial intelligence that focuses on the development of algorithms and models that allow computers to learn and make decisions based on data without being explicitly programmed. Machine learning algorithms can analyze and interpret complex data, identify patterns, and make predictions or recommendations.'

In [4]:
tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location", "unit"],
      },
    }
  }
]
messages = [{"role": "user", "content": "What's the weather like in Moscow today?"}]
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=messages,
  tools=tools,
  tool_choice="auto"
)

print(completion)


ChatCompletion(id='chatcmpl-9S8kULJHXeF7iGasrCtGBJ8U7RvLF', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_SDl1hpegqRut9dZgNyrZxPyQ', function=Function(arguments='{"location":"Moscow","unit":"celsius"}', name='get_current_weather'), type='function')]))], created=1716493842, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=21, prompt_tokens=83, total_tokens=104))


In [5]:
completion.choices[0].message

ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_SDl1hpegqRut9dZgNyrZxPyQ', function=Function(arguments='{"location":"Moscow","unit":"celsius"}', name='get_current_weather'), type='function')])

In [6]:
completion.choices[0].message.tool_calls[0].function

Function(arguments='{"location":"Moscow","unit":"celsius"}', name='get_current_weather')

# Code interpreter

In [3]:
stream_dataset = io.BytesIO()
df: pd.DataFrame = sns.load_dataset('titanic')
df.to_csv(stream_dataset, mode="wb")
stream_dataset.seek(0)

0

In [4]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [5]:
dataset_file = client.files.create(
  file=stream_dataset,
  purpose="assistants"
)
dataset_file

FileObject(id='file-dWFpHXGJeY2hBAnUiy224Ot6', bytes=60473, created_at=1716500733, filename='upload', object='file', purpose='assistants', status='processed', status_details=None)

In [6]:
thread = client.beta.threads.create()

In [7]:
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="Target: 'survived'. How it connected with feature 'age'? Plot pairplot graph for them.",
  # attachments=[{"file_id": dataset_file.id, "tools": [{"type": "code_interpreter"}]}],
)

In [8]:
features_charts_painter = client.beta.assistants.create(
  instructions="You are an excellent senior Data Scientist with 10 years of experience. You write and run code to describe connection between features and target, when you get table with data.",
  model="gpt-4-turbo",
  name="Features charts painter",
  tools=[{"type": "code_interpreter"}],
  tool_resources={
    "code_interpreter": {
      "file_ids": [dataset_file.id]
    }
  }
)

In [9]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=features_charts_painter.id,
  instructions="Please address the user as Jane Doe. The user has a premium account."
)

In [10]:
if run.status == 'completed': 
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
else:
  print(run.status)

SyncCursorPage[Message](data=[Message(id='msg_trU30twoe7QT1yBM2M58TG1J', assistant_id='asst_DEggcxZgypY0KrxXCyAqOaAc', attachments=[], completed_at=None, content=[ImageFileContentBlock(image_file=ImageFile(file_id='file-aDb6543gGRQOJlAJiWAbvfIF', detail=None), type='image_file'), TextContentBlock(text=Text(annotations=[], value="Here's a scatter plot showing the relationship between 'survived' (as a binary categorical variable) and 'age'. Survivors are designated by the value 1 (Yes) and non-survivors by 0 (No), along the y-axis against 'age' on the x-axis.\n\nIn this plot:\n- Each point represents a passenger.\n- The distribution of ages can be seen along the horizontal rugplot at the bottom.\n\nIf you need any other type of visualization or further analysis on how 'age' affects the survival rate, feel free to ask!"), type='text')], created_at=1716500779, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_sKzparb39vMOuZc3zX

In [11]:
for d in messages.data[::-1]:
    # if isinstance(d.content[0], ImageFileContentBlock):
    #     pass
        # print(f"{d.role}: {d.content[1].text.value}\n")
    # else:
    print(f"{d.role}: {d.content[0].text.value}\n")

user: Target: 'survived'. How it connected with feature 'age'? Plot pairplot graph for them.

assistant: To assist you better, I will first examine the contents and format of the uploaded file to understand the data structure. This is necessary to know how to accurately read the data and plot the requested pairplot showing the relationship between the 'survived' target and the 'age' feature. Let's start by examining the file.

assistant: The dataset appears to be related to Titanic passenger information with various features such as 'survived', 'pclass', 'sex', 'age', and others. Now, I'll plot a pairplot to see how the 'survived' target is connected with the 'age' feature. This type of plot will help visualize the distribution of 'age' relative to the 'survived' status.

Let's create the plot now.

assistant: It seems there was an issue plotting the pairplot due to some data type conflicts or other internal errors. I'll try a different approach to visualize the relationship between 's

AttributeError: 'ImageFileContentBlock' object has no attribute 'text'

In [16]:
d.content[0]

ImageFileContentBlock(image_file=ImageFile(file_id='file-aDb6543gGRQOJlAJiWAbvfIF', detail=None), type='image_file')

In [12]:
d.content[0].image_file

ImageFile(file_id='file-aDb6543gGRQOJlAJiWAbvfIF', detail=None)

In [13]:
d.content[1].text.value

"Here's a scatter plot showing the relationship between 'survived' (as a binary categorical variable) and 'age'. Survivors are designated by the value 1 (Yes) and non-survivors by 0 (No), along the y-axis against 'age' on the x-axis.\n\nIn this plot:\n- Each point represents a passenger.\n- The distribution of ages can be seen along the horizontal rugplot at the bottom.\n\nIf you need any other type of visualization or further analysis on how 'age' affects the survival rate, feel free to ask!"

In [18]:
# handle image file
api_response = client.files.with_raw_response.retrieve_content(file_id=d.content[0].image_file.file_id)

if api_response.status_code == 200:
    content = api_response.content
    image_data = BytesIO(content)
    img = Image.open(image_data)
  # with open('image.png', 'wb') as f:
  #   f.write(content)
    print('File downloaded successfully.')

NotFoundError: Error code: 404 - {'error': {'message': 'No such File object: file-aDb6543gGRQOJlAJiWAbvfIF', 'type': 'invalid_request_error', 'param': 'id', 'code': None}}

In [None]:
content

In [15]:
content = client.files.retrieve_content(file_id=d.content[0].image_file.file_id)

  content = client.files.retrieve_content(file_id=d.content[0].image_file.file_id)


NotFoundError: Error code: 404 - {'error': {'message': 'No such File object: file-aDb6543gGRQOJlAJiWAbvfIF', 'type': 'invalid_request_error', 'param': 'id', 'code': None}}

In [114]:
show_json(d)

{'id': 'msg_HChrDxaU2rn5GdojnHLlRts9',
 'assistant_id': 'asst_D9mN7oNM8g9BXilOYJOSXFmj',
 'attachments': [],
 'completed_at': None,
 'content': [{'image_file': {'file_id': 'file-EG563dcnmTP7fdDLCcyF7awQ',
    'detail': None},
   'type': 'image_file'},
  {'text': {'annotations': [],
    'value': "Here is the scatter plot depicting the relationship between 'age' and 'survived' from the dataset you provided. Each point represents a passenger, where the x-axis is the age of the passenger and the y-axis indicates survival status (1 for survived and 0 for did not survive).\n\nFrom the plot, we can observe the spread of ages among those who survived and those who did not, though it's challenging to discern a clear trend or pattern just from this visualization. A more detailed statistical analysis or a different type of visual approach might help evaluate any correlations more thoroughly.\n\nIf you would like additional analysis or another type of visualization to explore this relationship, pl

In [100]:
show_json(messages)

{'data': [{'id': 'msg_OMZylN4YBG7WTvglHEKdTjm5',
   'assistant_id': 'asst_lrekOolN6pqYoZGKEN4leWOT',
   'attachments': [],
   'completed_at': None,
   'content': [{'text': {'annotations': [],
      'value': 'The dataset contains several features along with the target "survived". It appears that some values in the "age" column are missing, as there are only 714 non-null entries out of 891 total entries. We\'ll need to handle these missing values appropriately before analyzing the relationship between "age" and "survived".\n\nNow, let\'s visualize the relationship between "age" and "survived" using some statistical and graphical methods. I\'ll start by handling missing age values, after which we can produce some plots and summary statistics. Let\'s proceed with handling missing data. Would you prefer removing the rows with missing \'age\' values, imputing them with a statistic (such as the mean or median), or using another approach?'},
     'type': 'text'}],
   'created_at': 1716499232,


In [94]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [105]:
client.beta.threads.delete(thread_id=thread.id)

ThreadDeleted(id='thread_M3vhzW0OKeXG5amZO7nRt4Xr', deleted=True, object='thread.deleted')

In [106]:
client.beta.assistants.delete(assistant_id=features_charts_painter.id)

AssistantDeleted(id='asst_lrekOolN6pqYoZGKEN4leWOT', deleted=True, object='assistant.deleted')

In [82]:
show_json(features_charts_painter)

{'id': 'asst_tt3HkjXRtQmfPdENVUMF7fGt',
 'created_at': 1716497124,
 'description': None,
 'instructions': 'You are an excellent senior Data Scientist with 10 years of experience. You write and run code to describe connection between features and target, when you get table with data.',
 'metadata': {},
 'model': 'gpt-4-turbo',
 'name': 'Features charts painter',
 'object': 'assistant',
 'tools': [{'type': 'code_interpreter'}],
 'response_format': 'auto',
 'temperature': 1.0,
 'tool_resources': {'code_interpreter': {'file_ids': ['file-NlxDcmNsaRh7J0kVbJOiFC9z']},
  'file_search': None},
 'top_p': 1.0}

In [59]:
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="I need to solve the equation `3x + 11 = 14`. Can you help me?"
)

In [60]:
assistant = client.beta.assistants.create(
  name="Math Tutor",
  instructions="You are a personal math tutor. Write and run code to answer math questions.",
  tools=[{"type": "code_interpreter"}],
  model="gpt-4o",
)

In [61]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Please address the user as Jane Doe. The user has a premium account."
)

In [87]:
client.beta.assistants.delete(assistant_id=assistant.id)

AssistantDeleted(id='asst_iQzczi6vumXzHzJOblRB4kZw', deleted=True, object='assistant.deleted')