<a href="https://colab.research.google.com/github/alanntl/SELGO-LITE/blob/alanntl/SLEGO_LITE_APR26.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SLEGO Project: UNSW CSE PhD Research - Alan Siu Lung Ng


# Connect this Notebook to your GoogleDrive
paste this code on terminal if you wanna run colab using local runtime:

  jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=8888 --NotebookApp.port_retries=0

In [1]:
%%time
import sys

# Function to check if running in Google Colab
def is_colab_runtime():
    return 'google.colab' in sys.modules

# Define main folder path based on runtime environment
if is_colab_runtime():
    print("This is running in Google Colab.")
    drive_mainfolder = '/content/drive/MyDrive/SLEGO'
    # Import necessary Colab modules here
    from google.colab import drive
    from google.colab import files
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)
else:
    print("This is running in a local or other remote runtime.")
    gmailaccount = 'xxxxxx@gmail.com'
    drive_mainfolder = f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/SLEGO"
    drive_folder= f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/"

# This %%time magic command only works in IPython/Jupyter environments, and its placement should be at the start of a cell.
# If needed for timing in a script, use the time module or other Python profiling tools.


This is running in a local or other remote runtime.
CPU times: user 402 µs, sys: 136 µs, total: 538 µs
Wall time: 491 µs


# Clone SLEGO repo

In [2]:
%%time
import os
import sys
from IPython.display import clear_output
import subprocess

# Path to your repository
repo_path = drive_mainfolder
repo_url = 'https://github.com/alanntl/SELGO-LITE.git'

# Function to run subprocess commands with error handling
def run_command(command, check=True, **kwargs):
    try:
        subprocess.run(command, check=check, **kwargs)
    except subprocess.CalledProcessError as e:
        print(f"Error running command {' '.join(command)}: {e}")
        return None
    return True

# Ensure the repository path exists
if not os.path.exists(repo_path):
    if run_command(['git', 'clone', repo_url, repo_path]):
        print("Repository cloned.")
else:
    # Change to the repository directory
    os.chdir(repo_path)

    # Fetch latest changes from the repository
    if run_command(['git', 'fetch']):
        # Check for new or changed directories
        result = subprocess.run(['git', 'diff', '--name-only', 'HEAD', 'origin/master'], capture_output=True, text=True)
        changed_files = result.stdout.splitlines()
        directory_changes = any(os.path.dirname(f) for f in changed_files)

        if directory_changes:
            # If there are directory changes, pull the updates
            if run_command(['git', 'pull']):
                print("Repository updated with new directory changes.")
        else:
            print("No directory changes detected; no update necessary.")


Error running command git fetch: Command '['git', 'fetch']' returned non-zero exit status 1.
CPU times: user 1.82 ms, sys: 5.67 ms, total: 7.49 ms
Wall time: 1.13 s


fatal: bad object refs/remotes/origin/alanntl (1)
error: https://github.com/alanntl/SELGO-LITE.git did not send all necessary objects



# Setup virtural library

In [3]:
%%time
import os
import sys

# Define the path to the virtual environment
slego_env = f"{drive_folder}/slego_env_v0_0_1"
requirements_file = f"{drive_mainfolder}/requirements.txt"

# Check if the virtual environment directory does not exist
if not os.path.exists(slego_env):
    # Install virtualenv if it's not installed
    !pip install virtualenv
    # Create the virtual environment
    !virtualenv "{slego_env}"

    # Activate the virtual environment
    !source "{slego_env}/bin/activate"

    # Check if the requirements file exists
    if os.path.exists(requirements_file):
        # Install the requirements from the file
        !pip install -r "{requirements_file}"

# Append the path to sys.path
if is_colab_runtime():
    sys.path.append(f"{slego_env}/lib/python3.10/site-packages")
else:
    !pip install -r "{requirements_file}"


CPU times: user 57.2 ms, sys: 21.2 ms, total: 78.4 ms
Wall time: 6.16 s


# Setup the app foldersystem for slego

In [4]:
# This will prompt for authorization and mount your Google Drive.
if is_colab_runtime()==True:
    drive.mount('/content/drive', force_remount=True)

folder_path = f'{drive_mainfolder}/slegospace'

# Now you can reference subfolders and files relative to the top-level folder
dataspace = '/dataspace/'  # This is equivalent to '/content/drive/MyDrive/SLEGO/slegospace/dataspace'
recordspace = '/recordspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/recordspace'
functionspace = '/functionspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/functionspace'
knowledgespace = '/knowledgespace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/knowledgespace'

if is_colab_runtime()==True:
  from google.colab import files
  files.view(folder_path)
os.chdir(folder_path)

# Import Libraries
Import all the related libraries

In [20]:
%%time
import panel as pn
import inspect
import ast  # For safely evaluating the input string
import re
import importlib
import json
import io
import time
import param
import json
from datetime import datetime
import itertools


# if colab or vscode or jupyter
if 'google.colab' in str(get_ipython()):
    pn.extension(comms='colab')
elif 'vscode' in str(get_ipython()):
    pn.extension(comms='vscode')
else:
    print('Unknown runtime environment')
    pn.extension(comms='colab')

pn.extension(sizing_mode = 'stretch_both')
pn.extension('ace', 'jsoneditor')

Unknown runtime environment


CPU times: user 35.1 ms, sys: 5.9 ms, total: 41 ms
Wall time: 40 ms


In [18]:
'google.colab' in str(get_ipython())

False

# Select which modules to import

In [22]:
%%time
def delete_func_file(func_file_path):
    # Check if the file exists
    if os.path.exists(func_file_path):
        # Delete the file
        os.remove(func_file_path)
        #print(f"File {func_file_path} has been deleted.")
    else:
        print(f"No file named {func_file_path} found.")


# Step 3: Get the list of .py files in the folder
py_files = [file for file in os.listdir(folder_path+functionspace) if file.endswith('.py')]

# New Step: Check if func.py exists and delete it
func_file_path = 'func.py'
delete_func_file(func_file_path)

funcfilecombo = pn.widgets.MultiChoice(name='Select Function',
                                       value=['util.py',
                                              'func_data_preprocss.py',
                                              'func_moving_avg_plot.py',
                                              'llm.py',
                                              'func_eda.py',
                                              'func_uci_dataset.py',
                                              'webscrape.py',
                                              'func_arxiv.py',
                                              'func_backtest.py',
                                              'func_autogluon.py'],
                                       options=py_files, height=200)

# create funcfilecombo_change function
def funcfilecombo_change(event):
    delete_func_file(func_file_path)

    py_files = funcfilecombo.value
    # Step 4 (Modified): Create a new file named func.py in the current repository
    with open(func_file_path, 'w') as func_file:
        # Step 5: Iterate over each .py file and append its content to func.py
        for py_file in py_files:
            print(py_file)
            file_path = os.path.join(folder_path+functionspace, py_file)
            with open(file_path, 'r') as file:
                func_file.write(file.read() + '\n')
    import func
    importlib.reload(func)


# param watch
funcfilecombo.param.watch(funcfilecombo_change, 'value')

display(funcfilecombo)

# call the event
funcfilecombo_change(None)
!ls

No file named func.py found.
util.py
func_data_preprocss.py
func_moving_avg_plot.py
llm.py
func_eda.py
func_uci_dataset.py
webscrape.py
func_arxiv.py
func_backtest.py
func_autogluon.py
[34mAutogluonModels[m[m        [34mdataspace[m[m              [34mknowledgespace[m[m
[34m__pycache__[m[m            func.py                [34mrecordspace[m[m
[34mag_multimodal_tutorial[m[m [34mfunctionspace[m[m
CPU times: user 26.9 ms, sys: 16 ms, total: 42.9 ms
Wall time: 161 ms


# SLEGO APP

In [23]:
%%time
import func
importlib.reload(func)

funccombo = func._create_multi_select_combobox(func)
input_text = pn.widgets.TextAreaInput(value='', placeholder='input the parameters', height=300)
compute_btn = pn.widgets.Button(name='Compute', height =50,  button_type='primary')
savepipe_btn = pn.widgets.Button(name='Save Pipeline', height =35)
pipeline_text= pn.widgets.TextInput(value='', placeholder='Input Pipeline Name', height=35)
json_toggle = pn.widgets.Toggle(name='Input mode: text or form', height =35, button_type='warning')

pipeline_dict = {}
json_editor = pn.widgets.JSONEditor(value=pipeline_dict, height=400, mode='form')
progress_text = pn.widgets.TextAreaInput(value='',placeholder='Input your analytics query here', name='Progress msg and query inputs for recommendation:',height=150)
output_text= pn.widgets.TextAreaInput(value='',placeholder= 'Results will be shown here', name='System output message:')

recommendation_btn = pn.widgets.Button(name='Get Recommendation', height =35, button_type='success')
recomAPI_text= pn.widgets.TextInput(value='', placeholder='Your AI API key', height=35)

# recommendation_btn param watch
def recommendation_btn_clicked(event):
    if event.new:
        output_text.value = 'recommenda clicked'

        func.chatgpt_chat(model='gpt-3.5-turbo',
                  user_input_file='dataspace/user_text_input.txt',
                  output_text_file='dataspace/gpt_output_text.txt',
                  output_json_file='dataspace/gpt_output_full.json',
                  temperature=1,
                  max_tokens=256,
                  top_p=1,
                  frequency_penalty=0,
                  presence_penalty=0,
                  api_key='sk-CiO5GzpXbxZQsMuKEQEkT3BlbkFJz4LS3FuI3f5NqmF1BXO',
                  user_message='[# your query# ], \n generate the pipeline dictionary for me base on my query and input:')

recommendation_btn.param.watch(recommendation_btn_clicked, 'value')


def json_toggle_clicked(event):
    if event.new:
        json_editor.mode= 'text'
    else:
        json_editor.mode= 'form'

json_toggle.param.watch(json_toggle_clicked, 'value')

# json_editor.param.watch(json_editor_change, 'value')



def get_doc_string(pipeline):
    text = input_text.value
    output=''
    data = ast.literal_eval(text)
    data.keys()
    # loop keys
    for key in data.keys():
        output +='#######'+str(key)+'#######\n'
        try:
            output += eval(f'func.{key}.__doc__')+'\n'
        except:
            output += 'No docstring found for this function\n'
    return output

def input_text_change(event):
    global pipeline_dict
    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    try:
        pipeline_dict = ast.literal_eval(text)
        #json_editor.value = pipeline_dict
        #print(pipeline_dict)
    except ValueError as e:
        output_text.value = f'Error parsing input: {e}'


def funccombo_change(event):
    output_text.value = ''
    list_funcs = funccombo.value
    list_params =[]

    for funcchoice in funccombo.value:
      function=  eval('func.'+funcchoice)
      list_params.append(func._extract_parameter(function))

    funcs_params = dict(zip(list_funcs,list_params))
    formatted_data = json.dumps(funcs_params, indent=5)

    json_editor.value = funcs_params

    input_text.value = str(formatted_data)
    output_text.value = get_doc_string(input_text.value)
    progress_text.value = f'selected {funccombo.value}!'



def save_record(space, data, pipeline_name=None):
    """
    Saves the given data structure as a JSON file named with the current datetime.

    Parameters:
    - data: The data structure to be saved as JSON.
    """
    if pipeline_name is None:
      # Generate a filename based on the current datetime, e.g., "record_20230405_153000.json"
      filename = datetime.now().strftime("record_%Y%m%d_%H%M%S.json")
    else:
      filename = pipeline_name+'.json'

    # Define the full path where you want to save the file, here assuming current directory
    full_path = './'+space+'/' + filename

    # Serialize and save the data structure to a file
    with open(full_path, "w") as file:
        json.dump(data, file, indent=5)


def compute_btn_clicked(event):
    progress_text.value = 'Computing...'
    input_text_change(None)
    time.sleep(1)

    pipeline_dict = json_editor.value
    output_text.value = ''

    for function_name, parameters in pipeline_dict.items():
        progress_text.value = f'Computing {function_name}...'
        module = 'func.'
        start_time = time.time()  # Start the timer

        # Dynamically execute the function
        function = eval(module + function_name)
        result = function(**parameters)
        # Convert result to string
        result_string = str(result)
        # Create an iterator for words
        words_iterator = iter(result_string.split())
        # Use itertools.islice to get the first 1000 words without creating a full list
        first_x_words = itertools.islice(words_iterator, 500)
        # Join the words and append to output_text.value
        # Computation time
        compute_time = time.time() - start_time

        output_text.value += f"\n===================={function_name}====================\n\n"
        output_text.value += f"Function computation Time: {compute_time:.4f} seconds\n"
        output_text.value += f"\n"
        output_text.value += " ".join(first_x_words)

    save_record('recordspace', pipeline_dict)
    progress_text.value = 'Done!'

def save_pipeline(event):
    if pipeline_text.value == '':
      pipeline_name = '__'
    else:
      pipeline_name = pipeline_text.value
    text = input_text.value
    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    data = ast.literal_eval(text)
    save_record('knowledgespace', data, pipeline_name)

# save output into specific folder
funccombo.param.watch(funccombo_change, 'value')
input_text.param.watch(input_text_change, 'value')

compute_btn.on_click(compute_btn_clicked)
savepipe_btn.on_click(save_pipeline)


app = pn.Row(pn.Column(funccombo,compute_btn,pn.Row(savepipe_btn,pipeline_text,json_toggle), json_editor ), pn.Column(pn.Row(recommendation_btn,recomAPI_text),progress_text,pn.layout.Divider(height=10,margin=(10)),output_text))

CPU times: user 8.46 ms, sys: 2.48 ms, total: 10.9 ms
Wall time: 10.1 ms


# Display the App

In [24]:
# sk-proj-YhVTgCpkaPhfnwTrPZ3YT3BlbkFJWaQDSe0cSC0OfT0X6WK
app