<a href="https://colab.research.google.com/github/alanntl/SELGO-LITE/blob/alanntl/SLEGO_LITE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SLEGO Project: UNSW CSE PhD Research - Alan Siu Lung Ng
https://github.com/alanntl/SELGO-LITE

# Connect this Notebook to your GoogleDrive
paste this code on terminal if you wanna run colab using local runtime:

  jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=8888 --NotebookApp.port_retries=0

For running SLEGO locally, you need to download Google Drive on your local computer to make the local path available.

IF you are collaborating data analytics using slego, please share your slego folder to your teammate, and the one being shared need to make this SLEGO folder a "shortcut" to google drive's main folder.

In [9]:
%%time
import sys

# Function to check if running in Google Colab
def is_colab_runtime():
    return 'google.colab' in sys.modules

# Define main folder path based on runtime environment
if is_colab_runtime():
    print("This is running in Google Colab.")
    drive_mainfolder = '/content/drive/MyDrive/SLEGO'
    drive_folder= '/content/drive/MyDrive/'
    # Import necessary Colab modules here
    from google.colab import drive
    from google.colab import files
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)
else:
    print("This is running in a local or other remote runtime.")
    gmailaccount = 'alann5157@gmail.com'
    drive_mainfolder = f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/SLEGO"
    drive_folder= f"/Users/an/Library/CloudStorage/GoogleDrive-{gmailaccount}/My Drive/"

# This %%time magic command only works in IPython/Jupyter environments, and its placement should be at the start of a cell.
# If needed for timing in a script, use the time module or other Python profiling tools.


This is running in Google Colab.
Mounted at /content/drive
CPU times: user 178 ms, sys: 28.3 ms, total: 206 ms
Wall time: 3.96 s


# Clone SLEGO repo

In [10]:
%%time
import os
import sys
from IPython.display import clear_output
import subprocess

# Path to your repository
repo_path = drive_mainfolder
repo_url = 'https://github.com/alanntl/SELGO-LITE.git'

# Function to run subprocess commands with error handling
def run_command(command, check=True, **kwargs):
    try:
        subprocess.run(command, check=check, **kwargs)
    except subprocess.CalledProcessError as e:
        print(f"Error running command {' '.join(command)}: {e}")
        return None
    return True

# Ensure the repository path exists
if not os.path.exists(repo_path):
    if run_command(['git', 'clone', repo_url, repo_path]):
        print("Repository cloned.")
else:
    # Change to the repository directory
    os.chdir(repo_path)

    # Fetch latest changes from the repository
    if run_command(['git', 'fetch']):
        # Check for new or changed directories
        result = subprocess.run(['git', 'diff', '--name-only', 'HEAD', 'origin/master'], capture_output=True, text=True)
        changed_files = result.stdout.splitlines()
        directory_changes = any(os.path.dirname(f) for f in changed_files)

        if directory_changes:
            # If there are directory changes, pull the updates
            if run_command(['git', 'pull']):
                print("Repository updated with new directory changes.")
        else:
            print("No directory changes detected; no update necessary.")


Error running command git fetch: Command '['git', 'fetch']' returned non-zero exit status 1.
CPU times: user 8.73 ms, sys: 1.97 ms, total: 10.7 ms
Wall time: 1.09 s


# Setup virtural library

In [11]:
%%time
import os
import sys

# Define the path to the virtual environment
slego_env = f"{drive_folder}/slego_env_v0_0_1"
requirements_file = f"{drive_mainfolder}/requirements.txt"

# Check if the virtual environment directory does not exist
if not os.path.exists(slego_env):
    # Install virtualenv if it's not installed
    !pip install virtualenv
    # Create the virtual environment
    !virtualenv "{slego_env}"

    # Activate the virtual environment
    !source "{slego_env}/bin/activate"

    # Check if the requirements file exists
    if os.path.exists(requirements_file):
        # Install the requirements from the file
        !pip install -r "{requirements_file}"

# Append the path to sys.path
if is_colab_runtime()==True:
    sys.path.append(f"{slego_env}/lib/python3.10/site-packages")
else:
    !pip install -r "{requirements_file}"


CPU times: user 937 µs, sys: 0 ns, total: 937 µs
Wall time: 831 µs


# Setup the app foldersystem for slego

In [12]:
# This will prompt for authorization and mount your Google Drive.
if is_colab_runtime()==True:
    drive.mount('/content/drive', force_remount=True)

folder_path = f'{drive_mainfolder}/slegospace'

# Now you can reference subfolders and files relative to the top-level folder
dataspace = '/dataspace/'  # This is equivalent to '/content/drive/MyDrive/SLEGO/slegospace/dataspace'
recordspace = '/recordspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/recordspace'
functionspace = '/functionspace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/functionspace'
knowledgespace = '/knowledgespace/'  # Equivalent to '/content/drive/MyDrive/SLEGO/slegospace/knowledgespace'

if is_colab_runtime()==True:
  from google.colab import files
  files.view(folder_path)
os.chdir(folder_path)

Mounted at /content/drive


<IPython.core.display.Javascript object>

# Import Libraries
Import all the related libraries

In [13]:
%%time
import panel as pn
import inspect
import ast  # For safely evaluating the input string
import re
import importlib
import json
import io
import time
import param
import json
from datetime import datetime
import itertools
import pandas as pd
pn.extension()
pn.extension(sizing_mode = 'stretch_both')
pn.extension('ace', 'jsoneditor')
pn.extension('tabulator')


CPU times: user 86.5 ms, sys: 2.06 ms, total: 88.6 ms
Wall time: 86.6 ms


# Select which modules to import

In [14]:
%%time
def delete_func_file(func_file_path):
    # Check if the file exists
    if os.path.exists(func_file_path):
        # Delete the file
        os.remove(func_file_path)
        #print(f"File {func_file_path} has been deleted.")
    else:
        print(f"No file named {func_file_path} found.")


# Step 3: Get the list of .py files in the folder
py_files = [file for file in os.listdir(folder_path+functionspace) if file.endswith('.py')]

# New Step: Check if func.py exists and delete it
func_file_path = 'func.py'
delete_func_file(func_file_path)

funcfilecombo = pn.widgets.MultiChoice(name='Select Function',
                                       value=['util.py',
                                              'func_data_preprocss.py',
                                              'func_moving_avg_plot.py',
                                              'llm.py',
                                              'func_viz.py',
                                              'func_eda.py',
                                              'func_uci_dataset.py',
                                              'webscrape.py',
                                              'func_arxiv.py',
                                              'func_backtest.py',
                                              'func_autogluon.py'],
                                       options=py_files, height=200)

# create funcfilecombo_change function
def funcfilecombo_change(event):
    delete_func_file(func_file_path)

    py_files = funcfilecombo.value
    # Step 4 (Modified): Create a new file named func.py in the current repository
    with open(func_file_path, 'w') as func_file:
        # Step 5: Iterate over each .py file and append its content to func.py
        for py_file in py_files:
            print(py_file)
            file_path = os.path.join(folder_path+functionspace, py_file)
            with open(file_path, 'r') as file:
                func_file.write(file.read() + '\n')
    import func
    importlib.reload(func)


# param watch
funcfilecombo.param.watch(funcfilecombo_change, 'value')

display(funcfilecombo)

# call the event
funcfilecombo_change(None)
# !ls

No file named func.py found.
util.py
func_data_preprocss.py
func_moving_avg_plot.py
llm.py
func_viz.py
func_eda.py
func_uci_dataset.py
webscrape.py
func_arxiv.py
func_backtest.py
func_autogluon.py
CPU times: user 52.4 ms, sys: 3.27 ms, total: 55.6 ms
Wall time: 100 ms


# SLEGO APP

In [27]:
%%time
import func
importlib.reload(func)

funccombo = func._create_multi_select_combobox(func)

compute_btn = pn.widgets.Button(name='Compute', height =50,  button_type='primary')
savepipe_btn = pn.widgets.Button(name='Save Pipeline', height =35)
pipeline_text= pn.widgets.TextInput(value='', placeholder='Input Pipeline Name', height=35)
json_toggle = pn.widgets.Toggle(name='Input mode: text or form', height =35, button_type='warning')

pipeline_dict = {}
json_editor = pn.widgets.JSONEditor(value=pipeline_dict, height=400, mode='form')
input_text = pn.widgets.TextAreaInput(value='', placeholder='input the parameters', height=400)
progress_text = pn.widgets.TextAreaInput(value='',placeholder='Input your analytics query here', name='Progress msg and query inputs for recommendation:',height=150)
output_text= pn.widgets.TextAreaInput(value='',placeholder= 'Results will be shown here', name='System output message:')

recommendation_btn = pn.widgets.Button(name='Get Recommendation', height =35, button_type='success')
recomAPI_text= pn.widgets.TextInput(value='', placeholder='Your AI API key', height=35)


file_text = pn.widgets.TextInput(value='/dataspace', placeholder='Input the file name')
folder_select= pn.widgets.Select(name= 'Select Folder', options=[item for item in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, item))] +['/'], value= 'dataspace', height=50)

filefolder_confirm_btn = pn.widgets.Button(name='Confirm')

file_view = pn.widgets.Button(name='View')
file_download = pn.widgets.Button(name='Download')
file_upload = pn.widgets.Button(name='Upload')
file_input = pn.widgets.FileInput( name='Upload file')
file_delete = pn.widgets.Button(name='Delete')

selected_folder_path = folder_path + str(file_text.value)
file_list= os.listdir(selected_folder_path)
df_file = pd.DataFrame(file_list, columns=['Filter Files :'])
file_table = pn.widgets.Tabulator(df_file, theme = 'semantic-ui', header_filters=True,layout='fit_data_table', show_index=False,margin=(0,0,30,0))


def folder_select_changed(event):
    file_text.value = '/'+ str(folder_select.value)
    on_filefolder_confirm_btn_click(None)


def json_editor_change(event):

    text = str(json_editor.value)
    text = re.sub(r'\bfalse\b', 'False', text, flags=re.IGNORECASE)
    text = text.replace("'", '"')

    input_text.value = text


# recommendation_btn param watch
def recommendation_btn_clicked(event):
    if event.new:
        output_text.value = 'recommenda clicked'

        func.chatgpt_chat(model='gpt-3.5-turbo',
                  user_input_file='dataspace/user_text_input.txt',
                  output_text_file='dataspace/gpt_output_text.txt',
                  output_json_file='dataspace/gpt_output_full.json',
                  temperature=1,
                  max_tokens=256,
                  top_p=1,
                  frequency_penalty=0,
                  presence_penalty=0,
                  api_key='sk-CiO5GzpXbxZQsMuKEQEkT3BlbkFJz4LS3FuI3f5NqmF1BXO',
                  user_message='[# your query# ], \n generate the pipeline dictionary for me base on my query and input:')


def json_toggle_clicked(event):
    if event.new:
        widget_tab.active=1
    else:
        widget_tab.active=0

#
def get_doc_string(pipeline):
    text = input_text.value
    output=''
    #data = ast.literal_eval(text)
    data = json.loads(text)

    data.keys()
    # loop keys
    for key in data.keys():
        output +='#######'+str(key)+'#######\n'
        try:
            output += eval(f'func.{key}.__doc__')+'\n'
        except:
            output += 'No docstring found for this function\n'
    return output

def input_text_change(event):
    pipeline_dict={}
    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    text = text.replace("'", '"')

    try:
        #pipeline_dict = ast.literal_eval(text)
        pipeline_dict = json.loads(text)
        #json_editor.value = ast.literal_eval(text)
        json_editor.value = pipeline_dict
        input_text.value = json.dumps(pipeline_dict, indent=4)
        output_text.value += '\n Input changed!'

    except ValueError as e:
        output_text.value += f'\n Error parsing input: {e}'


def funccombo_change(event):
    output_text.value = ''
    list_funcs = funccombo.value
    list_params =[]

    for funcchoice in funccombo.value:
      function=  eval('func.'+funcchoice)
      list_params.append(func._extract_parameter(function))

    funcs_params = dict(zip(list_funcs,list_params))
    formatted_data = json.dumps(funcs_params, indent=5)

    json_editor.value = funcs_params

    input_text.value = str(formatted_data)
    output_text.value = get_doc_string(input_text.value)
    progress_text.value = f'selected {funccombo.value}!'



def save_record(space, data, pipeline_name=None):
    """
    Saves the given data structure as a JSON file named with the current datetime.

    Parameters:
    - data: The data structure to be saved as JSON.
    """
    if pipeline_name is None:
      # Generate a filename based on the current datetime, e.g., "record_20230405_153000.json"
      filename = datetime.now().strftime("record_%Y%m%d_%H%M%S.json")
    else:
      filename = pipeline_name+'.json'

    # Define the full path where you want to save the file, here assuming current directory
    full_path = './'+space+'/' + filename

    # Serialize and save the data structure to a file
    with open(full_path, "w") as file:
        json.dump(data, file, indent=5)


def compute_btn_clicked(event):
    progress_text.value = 'Computing...'

    if widget_tab.active==0:
        widget_tab.active=1
        widget_tab.active=0
    else:
        widget_tab.active=0
        widget_tab.active=1
    time.sleep(1)
    # widget_tab.active=0

    pipeline_dict = json_editor.value
    output_text.value = ''

    for function_name, parameters in pipeline_dict.items():
        progress_text.value = f'Computing {function_name}...'

        try:
            module = 'func.'
            start_time = time.time()  # Start the timer

            # Dynamically execute the function
            function = eval(module + function_name)
            result = function(**parameters)
            # Convert result to string
            result_string = str(result)
            # Create an iterator for words
            words_iterator = iter(result_string.split())
            # Use itertools.islice to get the first 1000 words without creating a full list
            first_x_words = itertools.islice(words_iterator, 500)
            # Join the words and append to output_text.value
            # Computation time
            compute_time = time.time() - start_time

            output_text.value += f"\n===================={function_name}====================\n\n"
            output_text.value += f"Function computation Time: {compute_time:.4f} seconds\n"
            output_text.value += f"\n"
            output_text.value += " ".join(first_x_words)

        except Exception as e:
            # Handle errors by appending them to the output text
            output_text.value += f"\n===================={function_name}====================\n\n"
            output_text.value += f"Error occurred: {str(e)}\n"

    save_record('recordspace', pipeline_dict)
    progress_text.value = 'Done!'

def save_pipeline(event):
    if pipeline_text.value == '':
      pipeline_name = '__'
    else:
      pipeline_name = pipeline_text.value
    text = input_text.value
    text = re.sub(r'\bfalse\b', 'False', input_text.value, flags=re.IGNORECASE)
    data = ast.literal_eval(text)
    save_record('knowledgespace', data, pipeline_name)

def on_file_buttons_click(event):
    output_text.value=''
    file_lsit =file_table.selected_dataframe.values.tolist()

    if len(file_lsit) != 0:
        if event.obj.name == 'View':
            output_text.value = ''
            for filename in file_lsit:
                output_text.value +=f"\n\n===================={str(filename)}====================\n\n"
                file_path = folder_path + str(file_text.value) + '/' + filename[0]
                with open(file_path, 'r') as file:
                    output_text.value += file.read()
        elif event.obj.name == 'Download':
            output_text.value = 'The file is already saved to your google drive folder!'
        elif event.obj.name == 'Upload':
            output_text.value = 'Please put it into the google drive folder!'
        elif event.obj.name == 'Delete':
            output_text.value = 'Please delete it from your google drive folder!'
    else:
        output_text.value = 'Please select a file to view, download, upload or delete!'

def on_filefolder_confirm_btn_click(event):
    selected_folder_path = folder_path + str(file_text.value)
    file_list= os.listdir(selected_folder_path)
    df_file = pd.DataFrame(file_list, columns=['Filter Files :'])
    file_table.value = df_file

widget_tab = pn.Tabs(('json input',json_editor),('text input',input_text))

widget_input =pn.Column(pn.layout.Divider(height=10,margin=(5)),widget_tab)
widget_btns = pn.Row(savepipe_btn,pipeline_text,json_toggle )
widget_updownload = pn.Column(pn.Row(file_view, file_download, ),file_input,pn.Row(file_upload, file_delete), height=150)
widget_files = pn.Column(folder_select,pn.Row( file_text,filefolder_confirm_btn, height=55), file_table, widget_updownload, width=250, margin=(0,20,0,0))
widget_funcsel= pn.Column(funccombo, compute_btn,widget_btns )
widget_recom = pn.Row(recommendation_btn,recomAPI_text)

funccombo.param.watch(funccombo_change, 'value')
input_text.param.watch(input_text_change, 'value')
json_toggle.param.watch(json_toggle_clicked, 'value')
json_editor.param.watch(json_editor_change, 'value')
recommendation_btn.param.watch(recommendation_btn_clicked, 'value')
compute_btn.on_click(compute_btn_clicked)
savepipe_btn.on_click(save_pipeline)
filefolder_confirm_btn.on_click(on_filefolder_confirm_btn_click)
file_view.on_click(on_file_buttons_click)
file_download.on_click(on_file_buttons_click)
file_upload.on_click(on_file_buttons_click)
file_delete.on_click(on_file_buttons_click)
folder_select.param.watch(folder_select_changed, 'value')


app = pn.Row(widget_files, pn.Column(widget_funcsel,widget_input), pn.Column(widget_recom, progress_text, pn.layout.Divider(height=10,margin=(10)),output_text))

CPU times: user 34.3 ms, sys: 2.36 ms, total: 36.7 ms
Wall time: 43.4 ms


# Display the App

In [28]:
if is_colab_runtime()==False:
    # Instantiate the template with widgets displayed in the sidebar
    template = pn.template.MaterialTemplate(
        title='SLEGO - Software Lego: A Collaborative and Modular Architecture for Data Analytics',
        sidebar=[],
    )
    # Append a layout to the main area, to demonstrate the list-like API
    template.main.append(app)
    template.show()
else:
    display(app)