In [None]:
# Colab: Mount into drive
from google.colab import drive
drive.mount("/content/drive")
#place this tutorial.ipynb in your google drive under below directories (of course you need to create these folders first!):
#/SideProjects/LLM/SgLang/
%cd '/content/drive/MyDrive/SideProjects/LLM/SgLang/'

In [None]:
#import all necassary packages:
! pip install --upgrade pip
! pip install "sglang[all]"
! pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
! pip install triton

In [None]:
#log into hugging face: this is used to connect with the selected language model
!huggingface-cli login

In [None]:
#used to running the server (you can choose any, but I recommend start with the parameter with 8b!) in the background, so that we can run the next cell in the colab
#we need to wait one or two minutes before running the next cell!
import subprocess
subprocess.Popen(['python', '-m', 'sglang.launch_server', '--model-path', 'meta-llama/Meta-Llama-3-8B-Instruct', '--port', '30000'])


In [None]:
#check if the server is running or not:
!ps -aux | grep sglang.launch_server

In [None]:
!pip install pyngrok
!ngrok authtoken '' # Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken

In [None]:
from flask import Flask, render_template, request, jsonify
from sglang import function, system, user, assistant, gen, set_default_backend, RuntimeEndpoint
from pyngrok import ngrok #run it through ngrok!

app = Flask(__name__)

# Define the grammar correction function
@function
def grammar_corrector(s, text_to_correct, conversation_state=None, user_request=None):
    if conversation_state is None:
        s += system("You are a grammar correction assistant. Correct the following text to proper English. You just need to generate the final output! No need to show the errors")
        s += user(f"Original text: {text_to_correct}")
    else:
        s += system(f"Continuing conversation. The last corrected text was: '{conversation_state}'.")
        s += user(f"User's follow-up request: {user_request}")

    s += assistant(gen("corrected_text", max_tokens=256))

set_default_backend(RuntimeEndpoint("http://localhost:30000"))

conversation_state = None

@app.route('/')
def index():
    return render_template('index.html')  # Renders the HTML file located in the templates folder

@app.route('/chat', methods=['POST'])
def chat():
    global conversation_state
    user_input = request.json['message']

    # Run the grammar corrector
    state = grammar_corrector.run(
        text_to_correct=user_input,
        conversation_state=conversation_state,
        user_request=user_input
    )

    # Extract the corrected text
    corrected_text = state["corrected_text"]
    conversation_state = corrected_text

    return jsonify({'response': corrected_text})



if __name__ == '__main__':
    # Open an ngrok tunnel to the Flask app
    public_url = ngrok.connect(5000)
    print(f" * ngrok tunnel available at: {public_url}")

    # Run the Flask app
    app.run()
