<a href="https://colab.research.google.com/github/alanntl/SELGO-LITE/blob/alanntl/SLEGO_LITE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SLEGO Project: UNSW CSE PhD Research - Alan Siu Lung Ng
https://github.com/alanntl/SELGO-LITE

# Connect this Notebook to your GoogleDrive
paste this code on terminal if you wanna run colab using local runtime:

  jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=8888 --NotebookApp.port_retries=0

For running SLEGO locally, you need to download Google Drive on your local computer to make the local path available.

IF you are collaborating data analytics using slego, please share your slego folder to your teammate, and the one being shared need to make this SLEGO folder a "shortcut" to google drive's main folder.

In [123]:
%%time
import sys

# Function to check if running in Google Colab
def is_colab_runtime():
    return 'google.colab' in sys.modules

# Define main folder path based on runtime environment
if is_colab_runtime():
    print("This is running in Google Colab.")
    drive_mainfolder = '/content/drive/MyDrive/SLEGO'
    drive_folder= '/content/drive/MyDrive/'
    # Import necessary Colab modules here
    from google.colab import drive
    from google.colab import files
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)
else:
    print("This is running in a local or other remote runtime.")
    gmailaccount = 'alann5157@gmail.com'
    drive_mainfolder = f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/SLEGO"
    drive_folder= f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/"

# This %%time magic command only works in IPython/Jupyter environments, and its placement should be at the start of a cell.
# If needed for timing in a script, use the time module or other Python profiling tools.


This is running in Google Colab.
Mounted at /content/drive
CPU times: user 249 ms, sys: 24.3 ms, total: 274 ms
Wall time: 6.77 s


# Clone SLEGO repo

In [124]:
%%time
import os
import sys
from IPython.display import clear_output
import subprocess

# Path to your repository
repo_path = drive_mainfolder
repo_url = 'https://github.com/alanntl/SELGO-LITE.git'

# Function to run subprocess commands with error handling
def run_command(command, check=True, **kwargs):
    try:
        subprocess.run(command, check=check, **kwargs)
    except subprocess.CalledProcessError as e:
        print(f"Error running command {' '.join(command)}: {e}")
        return None
    return True

# Ensure the repository path exists
if not os.path.exists(repo_path):
    if run_command(['git', 'clone', repo_url, repo_path]):
        print("Repository cloned.")
else:
    # Change to the repository directory
    os.chdir(repo_path)

    # Fetch latest changes from the repository
    if run_command(['git', 'fetch']):
        # Check for new or changed directories
        result = subprocess.run(['git', 'diff', '--name-only', 'HEAD', 'origin/master'], capture_output=True, text=True)
        changed_files = result.stdout.splitlines()
        directory_changes = any(os.path.dirname(f) for f in changed_files)

        if directory_changes:
            # If there are directory changes, pull the updates
            if run_command(['git', 'pull']):
                print("Repository updated with new directory changes.")
        else:
            print("No directory changes detected; no update necessary.")


Error running command git fetch: Command '['git', 'fetch']' returned non-zero exit status 1.
CPU times: user 10.5 ms, sys: 1.77 ms, total: 12.3 ms
Wall time: 1.44 s


# Setup virtural library

In [125]:
%%time
import os
import sys

# Define the path to the virtual environment
slego_env = f"{drive_folder}/slego_env_v0_0_1"
requirements_file = f"{drive_mainfolder}/requirements.txt"

# Check if the virtual environment directory does not exist
if not os.path.exists(slego_env):
    # Install virtualenv if it's not installed
    !pip install virtualenv
    # Create the virtual environment
    !virtualenv "{slego_env}"

    # Activate the virtual environment
    !source "{slego_env}/bin/activate"

    # Check if the requirements file exists
    if os.path.exists(requirements_file):
        # Install the requirements from the file
        !pip install -r "{requirements_file}"

# Append the path to sys.path
if is_colab_runtime()==True:
    sys.path.append(f"{slego_env}/lib/python3.10/site-packages")
else:
    !pip install -r "{requirements_file}"


CPU times: user 221 µs, sys: 0 ns, total: 221 µs
Wall time: 1.56 ms


# Setup the app foldersystem for slego

In [126]:
# This will prompt for authorization and mount your Google Drive.
if is_colab_runtime()==True:
    drive.mount('/content/drive', force_remount=True)

folder_path = f'{drive_mainfolder}/slegospace'

# Now you can reference subfolders and files relative to the top-level folder
dataspace = '/dataspace/'  # This is equivalent to '/content/drive/MyDrive/SLEGO/slegospace/dataspace'
recordspace = '/recordspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/recordspace'
functionspace = '/functionspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/functionspace'
knowledgespace = '/knowledgespace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/knowledgespace'

if is_colab_runtime()==True:
  from google.colab import files
  files.view(folder_path)
os.chdir(folder_path)

Mounted at /content/drive


<IPython.core.display.Javascript object>

# Import Libraries
Import all the related libraries

In [127]:
%%time
import panel as pn
import inspect
import ast  # For safely evaluating the input string
import re
import importlib
import json
import io
import time
import param
import json
from datetime import datetime
import itertools
import pandas as pd
pn.extension()
pn.extension(sizing_mode = 'stretch_both')
pn.extension('ace', 'jsoneditor')
pn.extension('tabulator')


CPU times: user 212 ms, sys: 7.22 ms, total: 219 ms
Wall time: 374 ms


# Select which modules to import

In [128]:
%%time
def delete_func_file(func_file_path):
    # Check if the file exists
    if os.path.exists(func_file_path):
        # Delete the file
        os.remove(func_file_path)
        #print(f"File {func_file_path} has been deleted.")
    else:
        print(f"No file named {func_file_path} found.")


# Step 3: Get the list of .py files in the folder
py_files = [file for file in os.listdir(folder_path+functionspace) if file.endswith('.py')]

# New Step: Check if func.py exists and delete it
func_file_path = 'func.py'
delete_func_file(func_file_path)

funcfilecombo = pn.widgets.MultiChoice(name='Select Function',
                                       value=['util.py',
                                              'func_data_preprocss.py',
                                              'func_moving_avg_plot.py',
                                              'llm.py',
                                              'func_eda.py',
                                              'func_uci_dataset.py',
                                              'webscrape.py',
                                              'func_arxiv.py',
                                              'func_backtest.py',
                                              'func_autogluon.py'],
                                       options=py_files, height=200)

# create funcfilecombo_change function
def funcfilecombo_change(event):
    delete_func_file(func_file_path)

    py_files = funcfilecombo.value
    # Step 4 (Modified): Create a new file named func.py in the current repository
    with open(func_file_path, 'w') as func_file:
        # Step 5: Iterate over each .py file and append its content to func.py
        for py_file in py_files:
            print(py_file)
            file_path = os.path.join(folder_path+functionspace, py_file)
            with open(file_path, 'r') as file:
                func_file.write(file.read() + '\n')
    import func
    importlib.reload(func)


# param watch
funcfilecombo.param.watch(funcfilecombo_change, 'value')

display(funcfilecombo)

# call the event
funcfilecombo_change(None)
# !ls

No file named func.py found.
util.py
func_data_preprocss.py
func_moving_avg_plot.py
llm.py
func_eda.py
func_uci_dataset.py
webscrape.py
func_arxiv.py
func_backtest.py
func_autogluon.py
CPU times: user 105 ms, sys: 9.43 ms, total: 114 ms
Wall time: 312 ms


# SLEGO APP

In [148]:
%%time
import func
importlib.reload(func)

funccombo = func._create_multi_select_combobox(func)

compute_btn = pn.widgets.Button(name='Compute', height =50,  button_type='primary')
savepipe_btn = pn.widgets.Button(name='Save Pipeline', height =35)
pipeline_text= pn.widgets.TextInput(value='', placeholder='Input Pipeline Name', height=35)
json_toggle = pn.widgets.Toggle(name='Input mode: text or form', height =35, button_type='warning')

pipeline_dict = {}
json_editor = pn.widgets.JSONEditor(value=pipeline_dict, height=400, mode='form')
input_text = pn.widgets.TextAreaInput(value='', placeholder='input the parameters', height=400)
progress_text = pn.widgets.TextAreaInput(value='',placeholder='Input your analytics query here', name='Progress msg and query inputs for recommendation:',height=150)
output_text= pn.widgets.TextAreaInput(value='',placeholder= 'Results will be shown here', name='System output message:')

recommendation_btn = pn.widgets.Button(name='Get Recommendation', height =35, button_type='success')
recomAPI_text= pn.widgets.TextInput(value='', placeholder='Your AI API key', height=35)


file_text = pn.widgets.TextInput(value='/dataspace', placeholder='Input the file name')
folder_select= pn.widgets.Select(name= 'Select Folder', options=[item for item in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, item))] +['/'], value= 'dataspace', height=50)

filefolder_confirm_btn = pn.widgets.Button(name='Confirm')

file_view = pn.widgets.Button(name='View')
file_download = pn.widgets.Button(name='Download')
file_upload = pn.widgets.Button(name='Upload')
file_input = pn.widgets.FileInput( name='Upload file')
file_delete = pn.widgets.Button(name='Delete')

selected_folder_path = folder_path + str(file_text.value)
file_list= os.listdir(selected_folder_path)
df_file = pd.DataFrame(file_list, columns=['Filter Files :'])
file_table = pn.widgets.Tabulator(df_file, theme = 'semantic-ui', header_filters=True,layout='fit_data_table', show_index=False,margin=(0,0,30,0))


def folder_select_changed(event):
    file_text.value = '/'+ str(folder_select.value)
    on_filefolder_confirm_btn_click(None)


def json_editor_change(event):
    #print(json_editor.value)
    input_text.value = str(json_editor.value)


# recommendation_btn param watch
def recommendation_btn_clicked(event):
    if event.new:
        output_text.value = 'recommenda clicked'

        func.chatgpt_chat(model='gpt-3.5-turbo',
                  user_input_file='dataspace/user_text_input.txt',
                  output_text_file='dataspace/gpt_output_text.txt',
                  output_json_file='dataspace/gpt_output_full.json',
                  temperature=1,
                  max_tokens=256,
                  top_p=1,
                  frequency_penalty=0,
                  presence_penalty=0,
                  api_key='sk-CiO5GzpXbxZQsMuKEQEkT3BlbkFJz4LS3FuI3f5NqmF1BXO',
                  user_message='[# your query# ], \n generate the pipeline dictionary for me base on my query and input:')


def json_toggle_clicked(event):
    if event.new:
        widget_tab.active=1
    else:
        widget_tab.active=0

#
def get_doc_string(pipeline):
    text = input_text.value
    output=''
    data = ast.literal_eval(text)
    data.keys()
    # loop keys
    for key in data.keys():
        output +='#######'+str(key)+'#######\n'
        try:
            output += eval(f'func.{key}.__doc__')+'\n'
        except:
            output += 'No docstring found for this function\n'
    return output

def input_text_change(event):

    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    try:
        pipeline_dict = ast.literal_eval(text)
        json_editor.value = ast.literal_eval(text)

    except ValueError as e:
        output_text.value = f'Error parsing input: {e}'


def funccombo_change(event):
    output_text.value = ''
    list_funcs = funccombo.value
    list_params =[]

    for funcchoice in funccombo.value:
      function=  eval('func.'+funcchoice)
      list_params.append(func._extract_parameter(function))

    funcs_params = dict(zip(list_funcs,list_params))
    formatted_data = json.dumps(funcs_params, indent=5)

    json_editor.value = funcs_params

    input_text.value = str(formatted_data)
    output_text.value = get_doc_string(input_text.value)
    progress_text.value = f'selected {funccombo.value}!'



def save_record(space, data, pipeline_name=None):
    """
    Saves the given data structure as a JSON file named with the current datetime.

    Parameters:
    - data: The data structure to be saved as JSON.
    """
    if pipeline_name is None:
      # Generate a filename based on the current datetime, e.g., "record_20230405_153000.json"
      filename = datetime.now().strftime("record_%Y%m%d_%H%M%S.json")
    else:
      filename = pipeline_name+'.json'

    # Define the full path where you want to save the file, here assuming current directory
    full_path = './'+space+'/' + filename

    # Serialize and save the data structure to a file
    with open(full_path, "w") as file:
        json.dump(data, file, indent=5)


def compute_btn_clicked(event):
    progress_text.value = 'Computing...'

    if widget_tab.active==0:
        widget_tab.active=1
        widget_tab.active=0
    else:
        widget_tab.active=0
        widget_tab.active=1
    time.sleep(1)
    # widget_tab.active=0

    pipeline_dict = json_editor.value
    output_text.value = ''

    for function_name, parameters in pipeline_dict.items():
        progress_text.value = f'Computing {function_name}...'
        module = 'func.'
        start_time = time.time()  # Start the timer

        # Dynamically execute the function
        function = eval(module + function_name)
        result = function(**parameters)
        # Convert result to string
        result_string = str(result)
        # Create an iterator for words
        words_iterator = iter(result_string.split())
        # Use itertools.islice to get the first 1000 words without creating a full list
        first_x_words = itertools.islice(words_iterator, 500)
        # Join the words and append to output_text.value
        # Computation time
        compute_time = time.time() - start_time

        output_text.value += f"\n===================={function_name}====================\n\n"
        output_text.value += f"Function computation Time: {compute_time:.4f} seconds\n"
        output_text.value += f"\n"
        output_text.value += " ".join(first_x_words)

    save_record('recordspace', pipeline_dict)
    progress_text.value = 'Done!'

def save_pipeline(event):
    if pipeline_text.value == '':
      pipeline_name = '__'
    else:
      pipeline_name = pipeline_text.value
    text = input_text.value
    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    data = ast.literal_eval(text)
    save_record('knowledgespace', data, pipeline_name)

def on_file_buttons_click(event):
    output_text.value=''
    file_lsit =file_table.selected_dataframe.values.tolist()

    if len(file_lsit) != 0:
        if event.obj.name == 'View':
            output_text.value = ''
            for filename in file_lsit:
                output_text.value +=f"\n\n===================={str(filename)}====================\n\n"
                file_path = folder_path + str(file_text.value) + '/' + filename[0]
                with open(file_path, 'r') as file:
                    output_text.value += file.read()
        elif event.obj.name == 'Download':
            output_text.value = 'The file is already saved to your google drive folder!'
        elif event.obj.name == 'Upload':
            output_text.value = 'Please put it into the google drive folder!'
        elif event.obj.name == 'Delete':
            output_text.value = 'Please delete it from your google drive folder!'
    else:
        output_text.value = 'Please select a file to view, download, upload or delete!'

def on_filefolder_confirm_btn_click(event):
    selected_folder_path = folder_path + str(file_text.value)
    file_list= os.listdir(selected_folder_path)
    df_file = pd.DataFrame(file_list, columns=['Filter Files :'])
    file_table.value = df_file



funccombo.param.watch(funccombo_change, 'value')
input_text.param.watch(input_text_change, 'value')
json_toggle.param.watch(json_toggle_clicked, 'value')
json_editor.param.watch(json_editor_change, 'value')
recommendation_btn.param.watch(recommendation_btn_clicked, 'value')
compute_btn.on_click(compute_btn_clicked)
savepipe_btn.on_click(save_pipeline)
widget_funcsel= pn.Column(funccombo, compute_btn,widget_btns )
widget_recom = pn.Row(recommendation_btn,recomAPI_text)
filefolder_confirm_btn.on_click(on_filefolder_confirm_btn_click)
file_view.on_click(on_file_buttons_click)
file_download.on_click(on_file_buttons_click)
file_upload.on_click(on_file_buttons_click)
file_delete.on_click(on_file_buttons_click)
folder_select.param.watch(folder_select_changed, 'value')

widget_tab = pn.Tabs(('json input',json_editor),('text input',input_text))
widget_input =pn.Column(pn.layout.Divider(height=10,margin=(5)),widget_tab)
widget_btns = pn.Row(savepipe_btn,pipeline_text,json_toggle )
widget_updownload = pn.Column(pn.Row(file_view, file_download, ),file_input,pn.Row(file_upload, file_delete), height=150)
widget_files = pn.Column(folder_select,pn.Row( file_text,filefolder_confirm_btn, height=55), file_table, widget_updownload, width=250, margin=(0,20,0,0))


app = pn.Row(widget_files, pn.Column(widget_funcsel,widget_input), pn.Column(widget_recom, progress_text, pn.layout.Divider(height=10,margin=(10)),output_text))

CPU times: user 57.7 ms, sys: 1.06 ms, total: 58.8 ms
Wall time: 75.1 ms


# Display the App

In [149]:
if is_colab_runtime()==False:
    # Instantiate the template with widgets displayed in the sidebar
    template = pn.template.MaterialTemplate(
        title='SLEGO - Software Lego: A Collaborative and Modular Architecture for Data Analytics',
        sidebar=[],
    )
    # Append a layout to the main area, to demonstrate the list-like API
    template.main.append(app)
    template.show()
else:
    display(app)

In [131]:
import json
import os

# Set the directory containing the JSON files
json_directory = 'knowledgespace'

# Set the path for the single output text file
output_file_path = 'knowledgespace/knowledge.txt'

# Ensure the output directory exists (output_file_path contains the full path, so extract the directory part)
output_directory = os.path.dirname(output_file_path)
os.makedirs(output_directory, exist_ok=True)

# Open the output file (creates the file if it doesn't exist)
with open(output_file_path, 'w') as output_file:
    # Iterate through each file in the JSON directory
    for filename in os.listdir(json_directory):
        if filename.endswith('.json'):
            # Construct the full path to the JSON file
            json_file_path = os.path.join(json_directory, filename)

            # Read JSON data from the file
            with open(json_file_path, 'r') as file:
                json_data = json.load(file)

            # Convert JSON data to a string with formatting
            text_data = json.dumps(json_data, indent=4)

            # Write the formatted JSON data to the output file, followed by a newline for separation
            output_file.write(text_data + "\n\n")

print("All JSON files have been combined into one text file at:", output_file_path)


All JSON files have been combined into one text file at: knowledgespace/knowledge.txt


In [132]:
import json
import os
import openai
def combine_json_to_text(json_directory:str= 'knowledgespace',
                         output_file_path:str ='knowledgespace/knowledge.txt'):
    """
    Combines all JSON files in the specified directory into a single text file.

    Parameters:
    - json_directory: str, path to the directory containing JSON files.
    - output_file_path: str, path to the output text file where combined JSON data will be stored.

    """

    # Ensure the output directory exists (output_file_path contains the full path, so extract the directory part)
    output_directory = os.path.dirname(output_file_path)
    os.makedirs(output_directory, exist_ok=True)

    # Open the output file (creates the file if it doesn't exist)
    with open(output_file_path, 'w') as output_file:
        # Iterate through each file in the JSON directory
        for filename in os.listdir(json_directory):
            if filename.endswith('.json'):
                # Construct the full path to the JSON file
                json_file_path = os.path.join(json_directory, filename)

                # Read JSON data from the file
                with open(json_file_path, 'r') as file:
                    json_data = json.load(file)

                # Convert JSON data to a string with formatting
                text_data = json.dumps(json_data, indent=4)

                # Write the formatted JSON data to the output file, followed by a newline for separation
                output_file.write(text_data + "\n\n")

    print(f"All JSON files have been combined into one text file at: {output_file_path}")

# Example usage:
combine_json_to_text('knowledgespace', 'knowledgespace/knowledge2.txt')


All JSON files have been combined into one text file at: knowledgespace/knowledge2.txt


In [133]:
def call_api(prompt, api_key):
    """Makes a POST API call with the prompt and returns the response."""
    url = 'https://api.openai.com/v1/engines/text-davinci-003/completions'  # Example URL
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }
    payload = {
        'prompt': prompt,
        'max_tokens': 150
    }
    response = requests.post(url, json=payload, headers=headers)
    return response.json()


In [134]:
def chatgpt_chat(model:str='gpt-3.5-turbo',
                  user_input_file:str='dataspace/user_text_input.txt',
                  output_text_file:str='dataspace/gpt_output_text.txt',
                  output_json_file:str='dataspace/gpt_output_full.json',
                  temperature:int=1,
                  max_tokens:int=256,
                  top_p:int=1,
                  frequency_penalty:int=0,
                  presence_penalty:int=0,
                  api_key:str='ai_api',
                  user_message:str='Summarize:',):

    '''
    This function interfaces with OpenAI's GPT model to process a text input and obtain a generated response.
    It reads an additional input from a file, merges it with the user's direct input, and sends the combined
    content to the API. The response is then saved to both text and JSON files.

    Parameters:
        api_key (str): Your OpenAI API key.
        model (str): Identifier for the model version to use.
        user_message (str): Direct user input as a string.
        user_input_file (str): Path to a file containing additional text to send to the API.
        output_file (str): Path to save the plain text response from the API.
        output_json_file (str): Path to save the full JSON response from the API.
        temperature (float): Controls randomness in response generation. Higher is more random.
        max_tokens (int): Maximum length of the response.
        top_p (float): Controls diversity via nucleus sampling: 0.1 means only top 10% probabilities considered.
        frequency_penalty (float): Decreases likelihood of repeating words.
        presence_penalty (float): Decreases likelihood of repeating topics.

    Returns:
        str: The text response from the API.
    '''

    # Initialize variables to store responses
    ans = None
    ans_dict = {}

    combined_message = user_message  # Start with the user's direct message

    # Determine the file type and read content accordingly
    if os.path.exists(user_input_file):
        file_extension = user_input_file.split('.')[-1].lower()
        if file_extension == 'txt':
            with open(user_input_file, 'r') as file:
                file_contents = file.read().strip()
            combined_message += f"\n\n==== Text File Input ====\n\n{file_contents}"
        elif file_extension == 'pdf':
            with open(user_input_file, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                pdf_contents = ' '.join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
            combined_message += f"\n\n==== PDF File Input ====\n\n{pdf_contents}"
        elif file_extension == 'docx':
            doc = Document(user_input_file)
            docx_contents = ' '.join([para.text for para in doc.paragraphs])
            combined_message += f"\n\n==== DOCX File Input ====\n\n{docx_contents}"
        elif file_extension == 'xlsx':
            workbook = load_workbook(filename=user_input_file)
            sheet = workbook.active
            xlsx_contents = ' '.join([str(cell.value) for row in sheet for cell in row if cell.value is not None])
            combined_message += f"\n\n==== XLSX File Input ====\n\n{xlsx_contents}"
        elif file_extension in ['png', 'jpg', 'jpeg']:
            img = Image.open(user_input_file)
            image_text = pytesseract.image_to_string(img)
            combined_message += f"\n\n==== Image File Input (OCR) ====\n\n{image_text}"
        elif file_extension == 'json':
            with open(user_input_file, 'r') as file:
                json_data = json.load(file)
                json_contents = json.dumps(json_data, indent=4)
            combined_message += f"\n\n==== JSON File Input ====\n\n{json_contents}"
        elif file_extension == 'csv':
            with open(user_input_file, mode='r', newline='', encoding='utf-8') as file:
                reader = csv.reader(file)
                csv_contents = ' '.join([','.join(row) for row in reader])
            combined_message += f"\n\n==== CSV File Input ====\n\n{csv_contents}"


    try:
        # Set the API key (consider using environment variables for security)
        openai.api_key = api_key

        # Create a chat completion request with the specified parameters
        response = openai.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": combined_message}],
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
        )

        # Extract and process the response
        ans_dict = response.to_dict()
        if 'choices' in ans_dict and len(ans_dict['choices']) > 0:
            if 'message' in ans_dict['choices'][0]:
                ans = ans_dict['choices'][0]['message']['content']

        # Save the text response and the full JSON response
        if ans:
            with open(output_text_file, 'w') as f:
                f.write(ans)
        with open(output_json_file, 'w') as json_file:
            json.dump(ans_dict, json_file, indent=4)

    except Exception as e:
        print(f"An unexpected error occurred: {e}")

    return ans


In [135]:
chatgpt_chat(model='gpt-4-turbo',
                  user_input_file='knowledgebase/knowledge.txt',
                  output_text_file='knowledgebase/pipeline_recommendation.txt',
                  output_json_file='knowledgebase/pipeline_recommendation.json',
                  temperature=1,
                  max_tokens=256,
                  top_p=1,
                  frequency_penalty=0,
                  presence_penalty=0,
                  api_key= userdata.get('chatgpt'),
                  user_message='which one is about finance')


An unexpected error occurred: [Errno 2] No such file or directory: 'knowledgebase/pipeline_recommendation.txt'


"It seems like your question might be missing some context. Could you please provide more details or specify the options you are asking about regarding finance? Whether you're asking about books, topics, professions, or something else related to finance, just let me know so I can give you the most accurate answer."

In [136]:
openai.api_key = userdata.get('chatgpt')

combined_message=' summarize'


# Create a chat completion request with the specified parameters
response = openai.chat.completions.create(
    model='gpt-4-turbo-preview',
    messages=[{"role": "user", "content": combined_message}],

)

response.to_dict()

{'id': 'chatcmpl-9IwKrT39qgA2Wtv405MgLo2oc1eGR',
 'choices': [{'finish_reason': 'stop',
   'index': 0,
   'logprobs': None,
   'message': {'content': "Certainly! To provide an accurate summary, I'll need more information about the content or topic you're interested in. Please provide details about the article, document, story, or subject matter you'd like summarized.",
    'role': 'assistant'}}],
 'created': 1714301173,
 'model': 'gpt-4-0125-preview',
 'object': 'chat.completion',
 'system_fingerprint': 'fp_d65ac1064c',
 'usage': {'completion_tokens': 42, 'prompt_tokens': 8, 'total_tokens': 50}}

In [137]:
response.to_dict()['choices'][0]['message']['content']

"Certainly! To provide an accurate summary, I'll need more information about the content or topic you're interested in. Please provide details about the article, document, story, or subject matter you'd like summarized."

In [138]:
client = OpenAI(api_key=userdata.get('chatgpt') )

completion = openai.chat.completions.create(
  model='gpt-4-turbo-preview',
  messages=[
    {"role": "system", "content": "You are a data analytics pipeline assistant"},
    {"role": "user", "content": "recommend"}
  ]
)

display(completion.choices[0].message.content)

"Of course, I'd be glad to make recommendations, but I need a bit more information to give you a tailored suggestion. Are you looking for recommendations on tools for a specific part of a data analytics pipeline, such as data extraction, transformation, loading (ETL), analysis, visualization, or something else? Or are you interested in best practices, platforms, programming languages, libraries, or perhaps recommendations on learning resources? Kindly provide a bit more detail, and I'll be more than happy to assist!"

In [139]:
client.files.create(
  file=open("/content/drive/MyDrive/SLEGO/slegospace/knowledgespace/AirQualityMLTask.json", "rb"),
  purpose="fine-tune"
)


FileObject(id='file-RWEGft7Vr7H6QPlLrMJvdiLi', bytes=1403, created_at=1714301180, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None)

In [140]:
from openai import OpenAI

client.files.list()

SyncPage[FileObject](data=[FileObject(id='file-RWEGft7Vr7H6QPlLrMJvdiLi', bytes=1403, created_at=1714301180, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-yC3yQvDiGPn0QbTdd2yIkIIF', bytes=1403, created_at=1714282330, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-o3bwYD2aGsCBMCEMKFrf9iGp', bytes=1403, created_at=1714282279, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-cLJEPAOWcOS4vFtdEJgF0oPC', bytes=1403, created_at=1714282245, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None), FileObject(id='file-2g5LG1pWK91xOkFaFbZs2fIv', bytes=93252, created_at=1707052315, filename='dataModelConcept.png', object='file', purpose='assistants', status='processed', status_details=None), Fi

In [141]:
client.files.retrieve('file-o3bwYD2aGsCBMCEMKFrf9iGp')

FileObject(id='file-o3bwYD2aGsCBMCEMKFrf9iGp', bytes=1403, created_at=1714282279, filename='AirQualityMLTask.json', object='file', purpose='fine-tune', status='processed', status_details=None)

In [142]:
returns= client.files.content('file-o3bwYD2aGsCBMCEMKFrf9iGp')

In [143]:
type(returns)