In [1]:
from sdkgenerator.generate import generate_sdk
from sdkgenerator.constants import DATA_DIR, GENERATED_SDK_DIR
from sdkgenerator.utils import get_code_from_model_response
from sdkgenerator.db import db
import pandas as pd

Database connected successfully. (MongoDB)


In [3]:
TEST_DATA_DIR = DATA_DIR / 'test'
TEST_DATA_DIR.mkdir(exist_ok=True)
SPECIFICATIONS_DIR = DATA_DIR / 'openapi-specifications'

# Load test data

In [4]:
test_pipeline_data = TEST_DATA_DIR / 'test_data.jsonl'
test_data = pd.read_json(test_pipeline_data, lines=True)

In [5]:
test_data.head()

Unnamed: 0,messages
0,"[{'role': 'system', 'content': 'You are a pyth..."
1,"[{'role': 'system', 'content': 'You are a pyth..."
2,"[{'role': 'system', 'content': 'You are a pyth..."
3,"[{'role': 'system', 'content': 'You are a pyth..."
4,"[{'role': 'system', 'content': 'You are a pyth..."


We can see that out test data if currently lines, each line is an array of objects that contain the following fields:
- `role`: the role of the messanger, e.g. `system`, `user`
- `content`: the content of the message

In [6]:
test_data['sdk'] = test_data.apply(lambda x: x.values[-1][-1], axis=1)

In [7]:
test_data

Unnamed: 0,messages,sdk
0,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
1,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
2,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
3,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
4,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
5,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
6,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
7,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."
8,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python fr..."
9,"[{'role': 'system', 'content': 'You are a pyth...","{'role': 'assistant', 'content': '```python im..."


## Query testing data

In [8]:
sdk_code_test_data_pipeline = [
        {"$match": {"step": "final_code", "response.openai.status": "success"}},
        {"$sort": {"_id": 1}},  # Sort by _id to sort by insertion date (I accidentally added multiple final_code for some data)
        {"$group": {
            "_id": "$sdk_name",
            "last_response_text": {"$last": "$response.openai.generated_text"}
        }},
        {"$project": {
            "sdk_name": "$_id",
            "last_response_text": 1,
            "_id": 0
        }}
    ]

data = list(db["train_data"].aggregate(sdk_code_test_data_pipeline))
df_test_data = pd.DataFrame(data)
df_test_data.head()

Unnamed: 0,last_response_text,sdk_name
0,```python\nimport json\nimport requests\n\ncla...,affinity
1,```python\nimport requests\nfrom typing import...,bity
2,```python\nimport requests\nfrom typing import...,2-c-2-p
3,```python\nimport requests\nimport json\nfrom ...,adatree_consent
4,```python\nimport requests\nfrom types import ...,finley


In [9]:
df_test_data['sdk_code'] = df_test_data['last_response_text'].apply(lambda x: get_code_from_model_response(x)[0])
df_test_data = df_test_data.drop(columns=['last_response_text'])
df_test_data.head()

Unnamed: 0,sdk_name,sdk_code
0,affinity,import json\nimport requests\n\nclass Affinity...
1,bity,"import requests\nfrom typing import Optional, ..."
2,2-c-2-p,"import requests\nfrom typing import Dict, Opti..."
3,adatree_consent,import requests\nimport json\nfrom types impor...
4,finley,import requests\nfrom types import *\nfrom typ...


In [10]:
# remove rows with None
df_test_data = df_test_data.dropna()
df_test_data.head()

Unnamed: 0,sdk_name,sdk_code
0,affinity,import json\nimport requests\n\nclass Affinity...
1,bity,"import requests\nfrom typing import Optional, ..."
2,2-c-2-p,"import requests\nfrom typing import Dict, Opti..."
3,adatree_consent,import requests\nimport json\nfrom types impor...
4,finley,import requests\nfrom types import *\nfrom typ...


In [11]:
types_code_test_data_pipeline = [
        {"$match": {"step": "types", "response.openai.status": "success"}},
        {"$sort": {"_id": 1}},  # Sort by _id to sort by insertion date (I accidentally added multiple final_code for some data)
        {"$group": {
            "_id": "$sdk_name",
            "last_response_text": {"$last": "$response.openai.generated_text"}
        }},
        {"$project": {
            "sdk_name": "$_id",
            "last_response_text": 1,
            "_id": 0
        }}
    ]

data = list(db["train_data"].aggregate(types_code_test_data_pipeline))
df_types = pd.DataFrame(data)
df_types['types_code'] = df_types['last_response_text'].apply(lambda x: get_code_from_model_response(x)[0])
df_types.drop(columns=['last_response_text'], inplace=True)
df_types.head()

Unnamed: 0,sdk_name,types_code
0,bity,"from typing import TypedDict, Union, List, Lit..."
1,dev,"from typing import TypedDict, Optional, Litera..."
2,finshark,"from typing import Literal, TypedDict, Optiona..."
3,bluesnap,"from typing import TypedDict, List, Literal, U..."
4,baseten,"from typing import TypedDict, Optional\n\nclas..."


In [12]:
# merge the two dataframes if sdk has no types set types_code to empty string
df_test_data = df_test_data.merge(df_types, on='sdk_name', how='left')
df_test_data.fillna('', inplace=True)


df_test_data.head()

Unnamed: 0,sdk_name,sdk_code,types_code
0,affinity,import json\nimport requests\n\nclass Affinity...,
1,bity,"import requests\nfrom typing import Optional, ...","from typing import TypedDict, Union, List, Lit..."
2,2-c-2-p,"import requests\nfrom typing import Dict, Opti...","from typing import TypedDict, Optional, List, ..."
3,adatree_consent,import requests\nimport json\nfrom types impor...,"from typing import TypedDict, Literal, Optiona..."
4,finley,import requests\nfrom types import *\nfrom typ...,"from typing import TypedDict, Optional\n\nclas..."


In [13]:
def merge_code(sdk_code: str, types_code: str) -> str:
    if types_code:
        return types_code + '\n' + '-'*10 + '\n' + sdk_code
    return sdk_code

In [14]:
#answer is sdk_code + types_code
df_test_data['answer'] = df_test_data.apply(lambda x: merge_code(x['sdk_code'], x['types_code']), axis=1)

df_test_data.head()

Unnamed: 0,sdk_name,sdk_code,types_code,answer
0,affinity,import json\nimport requests\n\nclass Affinity...,,import json\nimport requests\n\nclass Affinity...
1,bity,"import requests\nfrom typing import Optional, ...","from typing import TypedDict, Union, List, Lit...","from typing import TypedDict, Union, List, Lit..."
2,2-c-2-p,"import requests\nfrom typing import Dict, Opti...","from typing import TypedDict, Optional, List, ...","from typing import TypedDict, Optional, List, ..."
3,adatree_consent,import requests\nimport json\nfrom types impor...,"from typing import TypedDict, Literal, Optiona...","from typing import TypedDict, Literal, Optiona..."
4,finley,import requests\nfrom types import *\nfrom typ...,"from typing import TypedDict, Optional\n\nclas...","from typing import TypedDict, Optional\n\nclas..."


now we get the generated code for each sdk

In [15]:
df_test_data.drop(columns=['sdk_code', 'types_code'], inplace=True)
df_test_data.head()

Unnamed: 0,sdk_name,answer
0,affinity,import json\nimport requests\n\nclass Affinity...
1,bity,"from typing import TypedDict, Union, List, Lit..."
2,2-c-2-p,"from typing import TypedDict, Optional, List, ..."
3,adatree_consent,"from typing import TypedDict, Literal, Optiona..."
4,finley,"from typing import TypedDict, Optional\n\nclas..."


In [19]:
user_rules = "1. Use the requests library: All HTTP requests within the SDK must be made using the 'requests' library.\n2. Class structure: The SDK must be a class, with each method representing an endpoint in the API. Choose method names that reflect the action or resource they interact with.\n3. Authenticated requests: Implement a method '_make_authenticated_request' to handle authenticated requests.\n4. JSON request body: Use JSON format for the body of all requests.\n5. Return type: All methods must return the 'Response' object from the 'requests' library."

def get_generated_code(sdk_name):
    try:
        sdk_name_path = GENERATED_SDK_DIR / sdk_name / f"{sdk_name}.py"
        types_code_path = GENERATED_SDK_DIR / sdk_name / f"types.py"
        sdk_code = sdk_name_path.read_text()
        types_code = types_code_path.read_text() if types_code_path.exists() else ""
        return merge_code(sdk_code, types_code)
    except FileNotFoundError:
        print(f"Generating SDK for {sdk_name}")
        spec_path = None
        for file in SPECIFICATIONS_DIR.iterdir():
            if sdk_name in file.name:
                spec_path = file
                break
        if spec_path is None:
            raise Exception(f"Specification file for {sdk_name} not found")
        try:
            _, sdk_output_file, types_output_file = generate_sdk(spec_path, user_rules=user_rules)
        except Exception as e:
            print(f"Error generating SDK for {sdk_name}: {e}")
            return None
        if sdk_output_file is None:
            return None
        sdk_code = sdk_output_file.read_text()
        types_code = types_output_file.read_text() if types_output_file else ""
        
        return merge_code(sdk_code, types_code)

df_test_data['llm_answer'] = df_test_data['sdk_name'].apply(get_generated_code)

In [21]:
df_test_data.head()

Unnamed: 0,sdk_name,answer,llm_answer
0,affinity,import json\nimport requests\n\nclass Affinity...,import requests\n\n\nclass AffinityClient:\n ...
1,bity,"from typing import TypedDict, Union, List, Lit...","from typing import TypedDict, Union, List, Lit..."
2,2-c-2-p,"from typing import TypedDict, Optional, List, ...","from typing import TypedDict, Optional, List, ..."
3,adatree_consent,"from typing import TypedDict, Literal, Optiona...","from typing import TypedDict, Literal, Optiona..."
4,finley,"from typing import TypedDict, Optional\n\nclas...","from typing import TypedDict, Optional\n\n\ncl..."


# Evaluation