In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()
client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")
)

In [3]:
def get_response(messages, model="gpt-3.5-turbo", temperature=0.7):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response.choices[0].message.content

In [4]:
# 필수적인 정보 중 누락된 정보 구하기

def get_missing_parts(query):
    content = f"Q: I want to train AI Model. // missing parts: ['task', 'model name', 'dataset'] \
        Q: I want to train AI Model for text classification. // missing parts: ['model name', 'dataset'] \
        Q: Give me the code for training Roberta model. // missing parts: ['task', 'dataset'] \
        Q: I want to train Bert model with klue dataset // missing parts: ['task'] \
        Q: I need a code to train resnet18 for image classification with cifar10 dataset. // missing parts: [] \
        Q: {query} // missing parts:"
    
    messages = [
        # 시스템 프롬프트를 통해 필수적인 정보 명시
        {"role": "system", "content": "Essential parts for training AI model are task, model name, dataset."},
        {"role": "user", "content": content},
    ]

    return eval(get_response(messages))

In [5]:
def get_last_answer(query, additional_info):
    content = f"{query} {additional_info} Give me a code snippet."
    print(f'Final query: {content}')
    messages = [
        {"role": "user", "content": content},
    ]
    return get_response(messages)

In [6]:
def get_answer(query):
    print(f"User: {query}")
    missing_parts = get_missing_parts(query)
    print(f"Missing Parts: {missing_parts}")

    additional_infos = ''
    for part in missing_parts:
        print(f"Bot: Which {part} you need?")
        user_input = input()
        print(f"User: {user_input}")
        additional_infos += f"I need {user_input}. "

    last_answer = get_last_answer(query, additional_infos)
    print(f"Bot: {last_answer}")

In [19]:
get_answer("I want to train vgg19 model")

User: I want to train vgg19 model
Missing Parts: ['task', 'dataset']
Bot: Which task you need?
User: image classification
Bot: Which dataset you need?
User: cifar10 dataset
Final query: I want to train vgg19 model I need image classification. I need cifar10 dataset.  Give me a code snippet.
Bot: Sure! Here's a code snippet that demonstrates how to train a VGG19 model for image classification using the CIFAR-10 dataset in PyTorch:

```python
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vgg19

# Define transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

tes

In [21]:
get_answer("I want to train Bert model for text classification.")

User: I want to train Bert model for text classification.
Missing Parts: ['dataset']
Bot: Which dataset you need?
User: klue dataset
Final query: I want to train Bert model for text classification. I need klue dataset.  Give me a code snippet.
Bot: Here's a code snippet to train a BERT model for text classification using the KLUE dataset:

```python
import torch
from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW

# Load KLUE dataset
# Assuming you have downloaded the KLUE dataset and saved it as a CSV file
# You will need to modify this part to load the dataset according to your specific configuration
def load_klue_dataset():
    # Load the dataset using pandas or any other library
    # For example:
    import pandas as pd
    df = pd.read_csv('klue_dataset.csv')
    texts = df['text'].tolist()
    labels = df['label'].tolist()
    return texts, labels
    
# Tokenize the input texts
def tokenize_inputs(texts):
    toke

In [22]:
get_answer("I want to train AI model")

User: I want to train AI model
Missing Parts: ['task', 'model name', 'dataset']
Bot: Which task you need?
User: token classification
Bot: Which model name you need?
User: Roberta
Bot: Which dataset you need?
User: klue dataset
Final query: I want to train AI model I need token classification. I need Roberta. I need klue dataset.  Give me a code snippet.
Bot: Sure! Here's a code snippet that demonstrates how to train a token classification model using the Hugging Face Transformers library with the Roberta model and the KLUE dataset:

```python
import torch
from torch.utils.data import DataLoader
from transformers import RobertaForTokenClassification, RobertaTokenizer, AdamW
from transformers import get_linear_schedule_with_warmup
from transformers import Trainer, TrainingArguments
from datasets import load_dataset, load_metric

# Load KLUE dataset
dataset = load_dataset("klue", "klue_ner")

# Load Roberta tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Tokenize

In [12]:
# 응답이 부정확한 경우도 발생

get_answer("Give me the code for training GPT2 for text generation with klue dataset.")

User: Give me the code for training GPT2 for text generation with klue dataset.
Missing Parts: ['task']
Bot: Which task you need?
User: text generation
Final query: Give me the code for training GPT2 for text generation with klue dataset. I need text generation.  Give me a code snippet.
Bot: Sure! Here's a code snippet to train GPT-2 for text generation with the KLUE dataset using the Hugging Face's Transformers library:

```python
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2LMHeadModel, GPT2Tokenizer

class KLUEDataset(Dataset):
    def __init__(self, data_path, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.input_ids = []
        
        with open(data_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
        
        for line in lines:
            line = line.strip()
            encoded = tokenizer.encode_plus(line, 
                                 