In [1]:

import pkg_resources
import subprocess
import openai
import re
import json

def install_if_needed(package):
    try:
        dist = pkg_resources.get_distribution(package)
        print(f"{package} is already installed (version {dist.version}).")
    except pkg_resources.DistributionNotFound:
        print(f"{package} is not installed. Installing...")
        subprocess.call(['pip3', 'install', package])
        print(f"{package} has been installed.")
        
install_if_needed('openai')

openai is already installed (version 0.27.8).


### USEFUL LINKS

- https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/chatgpt?pivots=programming-language-chat-completions
- https://www.youtube.com/watch?v=uCKH8bmPgFs&t=502s
- https://github.com/openai/openai-cookbook

In [2]:
openai.api_type = "azure"
openai.api_version = "2023-05-15"

#HIDE
openai.api_base = "https://gpt4resource.openai.azure.com/" # Your Azure OpenAI resource's endpoint value.
openai.api_key = "618bf90290544e54b47bcfc2dba743da"

In [39]:
language = 'Python'
topic = 'Data exploration'
# difficulty = 'intermediate'

In [94]:
# difficulty_q1 = input(f'On a scale of 0 to 5, where 0 = no experience and 5 = expert, what is your current experience with {language}?')
# difficulty_q2 = input(f'On a scale of 0 to 5, where 0 = no experience and 5 = expert, what is your experience with {topic}?')
# difficulty_q3 = input(f'On a scale of 0 to 5, where 0 = no experience and 5 = expert, what is your experience with programming in general?')

# difficulty_q1 = '5'
# difficulty_q2 = '5'
# difficulty_q3 = '5'

# system_message = f"""Assistant is designed to help users create programming training materials for their own specific experience level. 
#     On a scale of 0 to 5, where 0 is no experience and 5 is expert, these users have an experience rating of {difficulty_q1} in {language}."""

# system_message = f"""Assistant is designed to help users create programming training materials for their own specific experience level. 
#     On a scale of 0 to 5, where 0 is no experience and 5 is expert, these users have an experience rating of {difficulty_q1} in {language}.
#     On a scale of 0 to 5, where 0 is no experience and 5 is expert, these users have an experience rating of {difficulty_q2} in {topic}.
#     On a scale of 0 to 5, where 0 is no experience and 5 is expert, these users have an experience rating of {difficulty_q3} in programming generally."""

In [113]:
system_message = 'Assistant is an intelligent chatbot designed to help users create programming training materials.'

### NOTES:
- don't try an change instances of ``` which already designates beginning/end of code snippets, reduces complexity of prompt = more consistency

In [114]:
user_message = f"""The area of interest for the training material is {topic}.
        Could you please provide me with incorrect code in {language}, followed by a corresponding unit test that will fail?
        Then provide corrected code that will pass the unit test.
        There should be 3 code snippets, one for the incorrect code, one for the unit test and one for the corrected code.
        For each code snippet, provide an appropriate file name within an absolute file path.
        The file paths should be structured such that the files adhere to the best practices of a project in {language},
        Surround the file paths with the symbols '£'. Example: £src/main/index.html£
        There should be no explanations of the code."""

In [115]:
response1 = openai.ChatCompletion.create(
    #gpt-35-turbo max tokens = 4,096
    engine="testdeploy", # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message}
    ],
    temperature = 0 # This is the degree of randomness of the model's output
)

# print(response)
# print(50*'-')

content = response1['choices'][0]['message']['content']

print('tokens: ', response1['usage'])
print(100*'-')
print(100*'-')
print(content)

tokens:  {
  "completion_tokens": 233,
  "prompt_tokens": 196,
  "total_tokens": 429
}
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Here are the requested code snippets:

1. Incorrect code in file £src/data_exploration/incorrect_code.py£:

```
def get_unique_values(data):
    unique_values = []
    for value in data:
        if value not in unique_values:
            unique_values.append(value)
    return unique_values
```

2. Unit test in file £tests/data_exploration/test_incorrect_code.py£:

```
import unittest
from data_exploration.incorrect_code import get_unique_values

class TestGetUniqueValues(unittest.TestCase):
    def test_get_unique_values(self):
        data = [1, 2, 3, 1, 2, 3, 4, 5]
        self.assertEqual(get_unique_values(data), [1, 2, 3, 4, 5, 6])
```

3. Corrected code in file £src/data_exploration/corrected_code

In [116]:
user_message2 = f"""Could you create a separate README.md file that addresses the code generated?
            Surround the contents of the README.md file with '$$'."""
            
response2 = openai.ChatCompletion.create(
    engine="testdeploy",
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": response1['choices'][0]['message']['content']},
        #ONLY ANSWERS THIS QUESTION, USES THE REST AS A CONVERSATION CONTEXT
        {"role": "user", "content": user_message2}
    ],
    temperature = 0
)

content = response2['choices'][0]['message']['content']

print('tokens: ', response2['usage'])
print(100*'-')
print(100*'-')
print(content)

tokens:  {
  "completion_tokens": 534,
  "prompt_tokens": 466,
  "total_tokens": 1000
}
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Sure, here's the ReadMe file:

$$
# Data Exploration

This module contains functions for exploring data.

## get_unique_values

This function takes a list of values and returns a sorted list of unique values.

### Usage

```python
from data_exploration import get_unique_values

data = [1, 2, 3, 1, 2, 3, 4, 5]
unique_values = get_unique_values(data)
print(unique_values)  # [1, 2, 3, 4, 5]
```

## Testing

To run the unit tests, use the following command:

```bash
python -m unittest discover -s tests
```

### Incorrect Code

The original implementation of `get_unique_values` did not sort the list of unique values before returning it. This is incorrect because the order of the values is not guaranteed to

## <font color='red'>LET USER DECIDE WHETHER WANT MORE/LESS DIFFICULT, might not be able to set the difficulty from the beginning but asking for more/less complexity seems to work</font>

In [117]:
user_message3 = f"""Could you make the code example more difficult?
            And could you create an updated README.md file below that addresses this newly generated code?
            Surround the contents of the README.md file with '$$'."""

response3 = openai.ChatCompletion.create(
    engine="testdeploy",
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": response1['choices'][0]['message']['content']},
        {"role": "user", "content": user_message2},
        {"role": "assistant", "content": response2['choices'][0]['message']['content']},
        {"role": "user", "content": user_message3}
    ],
    temperature = 0
)

content = response3['choices'][0]['message']['content']

print('tokens: ', response3['usage'])
print(100*'-')
print(100*'-')
print(content)

tokens:  {
  "completion_tokens": 811,
  "prompt_tokens": 1050,
  "total_tokens": 1861
}
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Sure, here's an updated code example:

1. Incorrect code in file £src/data_exploration/incorrect_code.py£:

```
def get_top_n_values(data, n):
    top_n_values = []
    for i in range(n):
        max_value = max(data)
        top_n_values.append(max_value)
        data.remove(max_value)
    return top_n_values
```

2. Unit test in file £tests/data_exploration/test_incorrect_code.py£:

```
import unittest
from data_exploration.incorrect_code import get_top_n_values

class TestGetTopNValues(unittest.TestCase):
    def test_get_top_n_values(self):
        data = [1, 2, 3, 4, 5]
        self.assertEqual(get_top_n_values(data, 3), [5, 4, 3])
```

3. Corrected code in file £src/data_exploration/corrected_c

In [77]:
def extract_languages(text):
    pattern = r"```(.*)\n"
    matches = re.findall(pattern, text)
    languages = [match for match in matches if match.strip() != '']
    return languages

def extract_filenames(text):
    pattern = r"££(.*)££"
    matches = re.findall(pattern, text)
    matches = [match for match in matches if match != '`']
    matches[-1] = 'corrected-' + matches[0].split('.')[0] + '.' + matches[0].split('.')[1]
    return matches

def remove_before_second_newline(text):
    lines = text.split('\n')
    new_text = '\n'.join(lines[1:])
    return new_text

def extract_code(text):
    pattern = r"```([\s\S]*?)```"
    matches = re.findall(pattern, text)
    code = []
    for match in matches:
        code.append(remove_before_second_newline(match))
    return code

In [78]:
languages = extract_languages(content)
filenames = extract_filenames(content)
code = extract_code(content)

In [79]:
languages

['javascript', 'javascript', 'javascript']

In [80]:
filenames

['app.js', 'app.test.js', 'corrected-app.js']

In [81]:
code

["const app = express();\n\napp.get('/', (req, res) => {\n  res.send('Hello World');\n});\n\napp.listen(3000);\n",
 'const app = require(\'./app\');\n\ndescribe(\'GET /\', () => {\n  it(\'responds with "Hello World"\', (done) => {\n    request(app)\n      .get(\'/\')\n      .expect(200, \'Hello\')\n      .end(done);\n  });\n});\n',
 "const app = express();\n\napp.get('/', (req, res) => {\n  res.send('Hello World');\n});\n\napp.listen(3000);\n\nmodule.exports = app;\n"]

In [None]:
def run_checks():
    pass

In [71]:
data = {
    'labels': ['Incorrect code', 'Unit test', 'Corrected code'],
    'languages': languages,
    'filenames': filenames,
    'code': code
}

json_filename = 'label_language_filename_code.json'
with open(json_filename, 'w') as file:
    json.dump(data, file)

In [28]:
#RUN CELL TO SAVE CONTENT IN A TXT FILE
import os

folder_path = 'txt_outputs'
text_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
txt_number_list = []
for file_name in text_files:
    txt_number = file_name[-5]
    txt_number = int(txt_number)
    txt_number_list.append(txt_number)
if not txt_number_list: #check if list is empty, i.e. no txt files yet
    new_txt_number = str(1)
else:
    new_txt_number = str(max(txt_number_list) + 1)
text_file = open('txt_outputs/output' + '_' + new_txt_number + '.txt', 'w')
text_file.write(system_message + '\n')
text_file.write(50*'-' + '\n')
text_file.write(user_message + '\n')
text_file.write(50*'-' + '\n')
text_file.write(content + '\n')
text_file.close()