<a href="https://colab.research.google.com/github/theaidran/make_me_summary/blob/main/make_me_summary_ui_4bit_textgen_gdrive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Make me summary text generation for Google Colab

This notebook uses [https://github.com/oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui)

▶⏩Run all the cells and a URL will appear at the bottom in around 5 minutes.🤞🐱‍👤

Then, choose the .txt file to be summarized and click the "Make summary" button.
Summarization will be automatically saved to your browser downloads directory.


## Parameters

* **save_logs_to_google_drive**: saves your chat logs, characters, and softprompts to Google Drive automatically, so that they will persist across sessions.
* **text_streaming**: streams the text output in real time instead of waiting for the full response to be completed.
* **load_in_8bit**: loads the model with 8-bit precision, reducing the GPU memory usage by half. This allows you to use the full 2048 prompt length without running out of memory, at a small accuracy and speed cost.
* **chat_language**: if different than English, activates automatic translation using Google Translate, allowing you to communicate with the bot in a different language.

## Updates

* check [README](https://github.com/theaidran/AI/blob/main/README.md) on github for Updates

## Credits

Based on the [original notebook by 81300](https://colab.research.google.com/github/81300/AI-Notebooks/blob/main/Colab-TextGen-GPU.ipynb).

Forked from [Philio](https://github.com/pcrii/Philo-Colab-Collection/blob/main/4bit_TextGen_Gdrive.ipynb).

Forked from [eucdee](https://github.com/eucdee/AI/blob/main/4bit_TextGen_Gdrive.ipynb).




In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [1]:
#@title 2. Install the web UI
#remember gradio is currently held back
save_logs_to_google_drive = False #@param {type:"boolean"}
save_everything_to_google_drive = False #@param {type:"boolean"}
#@markdown remember these models are large and free Gdrive is only 15Ggb <br>
install_gptq = True #@param {type:"boolean"}
#@markdown Install GPTQ-for-LLaMa for 4bit quantized models requiring --wbits 4
from IPython.display import clear_output
if save_logs_to_google_drive:
  import os
  import shutil
  from google.colab import drive
  drive.mount('/content/drive')
  base_folder = '/content/drive/MyDrive'

if save_everything_to_google_drive:
    import os
    import shutil
    from google.colab import drive
    drive.mount('/content/drive')
    base_folder = '/content/drive/MyDrive'
    repo_dir = '/content/drive/MyDrive/text-generation-webui'
    model_dir = '/content/drive/MyDrive/text-generation-webui/models'
    gptq_dir = '/content/drive/MyDrive/text-generation-webui/repositories/GPTQ-for-LLaMa'
    if os.path.exists(repo_dir):
        %cd {repo_dir}
        !git pull
    else:
        %cd /content/drive/MyDrive/
        !git clone https://github.com/theaidran/text-generation-webui

else:
    model_dir = '/content/text-generation-webui/models'
    repo_dir = '/content/text-generation-webui'
    %cd /content
    !git clone https://github.com/theaidran/text-generation-webui



if save_logs_to_google_drive:
  if not os.path.exists(f"{base_folder}/oobabooga-data"):
    os.mkdir(f"{base_folder}/oobabooga-data")
  if not os.path.exists(f"{base_folder}/oobabooga-data/logs"):
    os.mkdir(f"{base_folder}/oobabooga-data/logs")
  if not os.path.exists(f"{base_folder}/oobabooga-data/softprompts"):
    os.mkdir(f"{base_folder}/oobabooga-data/softprompts")
  if not os.path.exists(f"{base_folder}/oobabooga-data/characters"):
    shutil.move("text-generation-webui/characters", f"{base_folder}/oobabooga-data/characters")
  else:
    !rm -r "text-generation-webui/characters"

  !rm -r "text-generation-webui/softprompts"
  !ln -s "$base_folder/oobabooga-data/logs" "text-generation-webui/logs"
  !ln -s "$base_folder/oobabooga-data/softprompts" "text-generation-webui/softprompts"
  !ln -s "$base_folder/oobabooga-data/characters" "text-generation-webui/characters"

else:
  !mkdir text-generation-webui/logs

!ln -s text-generation-webui/logs .
!ln -s text-generation-webui/characters .
!ln -s text-generation-webui/models .
%rm -r sample_data
%cd text-generation-webui
!wget https://raw.githubusercontent.com/pcrii/Philo-Colab-Collection/main/settings-colab-template.json -O settings-colab-template.json

# Install requirements
!pip install -r requirements.txt
!pip install -r extensions/google_translate/requirements.txt
!pip install -r extensions/silero_tts/requirements.txt
print(f"\033[1;32;1m\n --> If you see a warning about \"pydevd_plugins\", just ignore it and move on to Step 3. There is no need to restart the runtime.\n\033[0;37;0m")

if install_gptq:
    if save_everything_to_google_drive:
        if os.path.exists(gptq_dir):
            %cd {gptq_dir}
            !git pull
            !pip install ninja
            !pip install -r requirements.txt
            !python setup_cuda.py install

        else:
            !mkdir /content/drive/MyDrive/text-generation-webui/repositories
            %cd /content/drive/MyDrive/text-generation-webui/repositories
            !git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b cuda
            !ln -s GPTQ-for-LLaMa text-generation-webui/repositories/GPTQ-for-LLaMa
            %cd GPTQ-for-LLaMa
            !pip install ninja
            !pip install -r requirements.txt
            !pip install --upgrade transformers==4.30.0
            !python setup_cuda.py install
    else:
        %mkdir /content/text-generation-webui/repositories/
        %cd /content/text-generation-webui/repositories/
        !git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b cuda
        !mkdir -p text-generation-webui/repositories
        !ln -s GPTQ-for-LLaMa text-generation-webui/repositories/GPTQ-for-LLaMa
        %cd GPTQ-for-LLaMa
        !pip install ninja
        !pip install -r requirements.txt
        !pip install --upgrade transformers==4.30.0
        !python setup_cuda.py install
clear_output()
print("Finished")
if save_logs_to_google_drive or save_everything_to_google_drive:
    drive_NOT_mounted = False
else:
    drive_NOT_mounted = True

if drive_NOT_mounted:
  import os
print("Available Models")
print(os.listdir(model_dir))

Finished
Available Models
['config.yaml', 'place-your-models-here.txt']


In [2]:
#@title 3. Download Model
#@markdown you can insert any huggingface model in Organization/model format
model_download = "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ" #@param [ "TheBloke/vicuna-13b-v1.3-GPTQ", "TheBloke/vicuna-33B-preview-GPTQ", "TheBloke/Vicuna-33B-1.3SuperHOT-8K-GPTQ", "TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g --branch actorder", "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ", "4bit/vicuna-13B-1.1-GPTQ-4bit-128g", "Aitrepreneur/wizardLM-7B-GPTQ-4bit-128g", "TheBloke/wizardLM-7B-GPTQ", "gozfarb/oasst-llama13b-4bit-128g", "catalpa/codecapybara-4bit-128g-gptq", "mzedp/dolly-v2-12b-GPTQ-4bit-128g", "autobots/pythia-12b-gptqv2-4bit", "TheBloke/Vicuna-13B-CoT-GPTQ", "TheBloke/gpt4-alpaca-lora-13B-GPTQ-4bit-128g"] {allow-input: true}
#@markdown remember these models are large and free Gdrive is only 15Ggb <br>

%cd {repo_dir}
!python download-model.py {model_download}
#this lists directorys from your model folder you can copy the name provided for the model you want for use in the the next cell
!rm {model_dir}/place-your-models-here.txt
#clear_output()
if save_logs_to_google_drive or save_everything_to_google_drive:
    drive_NOT_mounted = False
else:
    drive_NOT_mounted = True

if drive_NOT_mounted:
  import os
print("Available Models")
print(os.listdir(model_dir))

/content/text-generation-webui
Downloading the model to models/TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ
100% 11.7k/11.7k [00:00<00:00, 62.6MiB/s]
100% 866/866 [00:00<00:00, 6.01MiB/s]
100% 132/132 [00:00<00:00, 1.22MiB/s]
100% 2.59k/2.59k [00:00<00:00, 20.3MiB/s]
100% 39.5k/39.5k [00:00<00:00, 232kiB/s]
100% 135/135 [00:00<00:00, 828kiB/s]
100% 435/435 [00:00<00:00, 3.34MiB/s]
100% 1.84M/1.84M [00:00<00:00, 2.14MiB/s]
100% 500k/500k [00:00<00:00, 729kiB/s]
100% 727/727 [00:00<00:00, 5.58MiB/s]
100% 7.45G/7.45G [05:29<00:00, 22.6MiB/s]
Available Models
['config.yaml', 'TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ']


In [9]:
#@title 4. Launch
import json

#Close server if is running
!pkill -f -e -c server.py
!fuser -k 5000/tcp  # close api port
!fuser -k 5005/tcp  # close stream port

#@markdown if you dont know what to enter the previous cell should have printed available inputs <br> paste it here
model_load = "TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ" #@param {type:"string"}
# Parameters
#auto_devices = False #@param {type:"boolean"}
load_4bit_models = True #@param {type:"boolean"}

groupsize_128 = False #@param {type:"boolean"}
load_in_8bit = False #@param {type:"boolean"}
chat = True #@param {type:"boolean"}

text_streaming = True #@param {type:"boolean"}
activate_silero_text_to_speech = False #@param {type:"boolean"}
activate_sending_pictures = False #@param {type:"boolean"}
activate_character_bias = False #@param {type:"boolean"}
chat_language = "English" # @param ['Afrikaans', 'Albanian', 'Amharic', 'Arabic', 'Armenian', 'Azerbaijani', 'Basque', 'Belarusian', 'Bengali', 'Bosnian', 'Bulgarian', 'Catalan', 'Cebuano', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Corsican', 'Croatian', 'Czech', 'Danish', 'Dutch', 'English', 'Esperanto', 'Estonian', 'Finnish', 'French', 'Frisian', 'Galician', 'Georgian', 'German', 'Greek', 'Gujarati', 'Haitian Creole', 'Hausa', 'Hawaiian', 'Hebrew', 'Hindi', 'Hmong', 'Hungarian', 'Icelandic', 'Igbo', 'Indonesian', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Kannada', 'Kazakh', 'Khmer', 'Korean', 'Kurdish', 'Kyrgyz', 'Lao', 'Latin', 'Latvian', 'Lithuanian', 'Luxembourgish', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Maori', 'Marathi', 'Mongolian', 'Myanmar (Burmese)', 'Nepali', 'Norwegian', 'Nyanja (Chichewa)', 'Pashto', 'Persian', 'Polish', 'Portuguese (Portugal, Brazil)', 'Punjabi', 'Romanian', 'Russian', 'Samoan', 'Scots Gaelic', 'Serbian', 'Sesotho', 'Shona', 'Sindhi', 'Sinhala (Sinhalese)', 'Slovak', 'Slovenian', 'Somali', 'Spanish', 'Sundanese', 'Swahili', 'Swedish', 'Tagalog (Filipino)', 'Tajik', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Welsh', 'Xhosa', 'Yiddish', 'Yoruba', 'Zulu']

activate_google_translate = (chat_language != "English")

language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}

%cd {repo_dir}
# Applying the selected language and setting the prompt size to 2048
# if 8bit mode is selected
j = json.loads(open('settings-colab-template.json', 'r').read())
j["google_translate-language string"] = language_codes[chat_language]
if load_in_8bit:
  j["chat_prompt_size"] = 2048
with open('settings-colab.json', 'w') as f:
  f.write(json.dumps(j, indent=4))

params = set()
if chat:
  params.add('--chat')

if load_in_8bit:
  params.add('--load-in-8bit')
#if auto_devices:
#  params.add('--auto-devices')
if load_4bit_models:
  params.add('--wbits 4')

if groupsize_128:
  params.add('--groupsize 128')

active_extensions = []
if activate_sending_pictures:
  active_extensions.append('send_pictures')
if activate_character_bias:
  active_extensions.append('character_bias')
if activate_google_translate:
  active_extensions.append('google_translate')
if activate_silero_text_to_speech:
  active_extensions.append('silero_tts')
active_extensions.append('gallery')

if len(active_extensions) > 0:
  params.add(f'--extensions {" ".join(active_extensions)}')

if not text_streaming or activate_google_translate or activate_silero_text_to_speech:
  params.add('--no-stream')
if activate_character_bias:
  params.add('--verbose')

# Starting the web UI with tmux
cmd = f"tmux new -d python server.py --api  --loader exllama --max_seq_len 2048 --compress_pos_emb 1 --model {model_load}  --model_type LLaMa --settings settings-colab.json {' '.join(params)} "#>/content/logs.txt    #2>&1
print(cmd)
#for guanaco --quant-type  nf4  fp4 gptq-for-llama
#for falcon model --autogptq --trust-remote-code --groupsize 64
!$cmd
!rm -f /tmp/tmuxpipe && mkfifo /tmp/tmuxpipe && tmux pipe-pane -t 0 -o 'cat >> /tmp/tmuxpipe'
!cat /tmp/tmuxpipe > /content/log.txt 2>&1 &


tmux: server killed (pid 5834)
python3 killed (pid 5835)
2
/content/text-generation-webui
tmux new -d python server.py --api  --loader exllama --max_seq_len 2048 --compress_pos_emb 1 --model TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ  --model_type LLaMa --settings settings-colab.json --extensions gallery --chat --wbits 4 


In [10]:
#@title 5. Logs - server is starting
# update and wait until server is fully started

import psutil, time
from time import sleep
import IPython
from IPython.display import clear_output
clear_output

#check if proxy port is open
while((5000 in [i.laddr.port for i in psutil.net_connections()]) != True):
  sleep(5)
  !tail -n 1  /content/log.txt

!tail -n 10  /content/log.txt

2023-07-12 15:06:18 INFO:[32mLoading TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ...[0m
To create a public link, set `share=True` in `launch()`.
2023-07-12 15:06:18 INFO:[32mLoading settings from settings-colab.json...[0m
2023-07-12 15:06:18 INFO:[32mLoading TheBloke_Vicuna-13B-1-3-SuperHOT-8K-GPTQ...[0m
2023-07-12 15:06:26 INFO:[32mLoaded the model in 7.24 seconds.
[0m
2023-07-12 15:06:26 INFO:[32mLoading the extension "gallery"...[0m
Starting streaming server at ws://127.0.0.1:5005/api/v1/stream
Starting API at http://127.0.0.1:5000/api
Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [11]:
#@title 6. Simple UI - Make me summary

import IPython
from IPython.display import clear_output

try:
  import flask, flask_socketio
except ImportError:
  !pip install Flask flask-socketio eventlet gunicorn langchain==0.0.225
  clear_output()

import requests
import asyncio
import json
import sys
import os
import threading
import secrets
import flask
from flask import Flask, request, jsonify, render_template_string, session
import flask_socketio
from flask_socketio import SocketIO

iport = 5001 # interface port
from google.colab.output import eval_js
print("External link:",end=" ")
print(eval_js(f"google.colab.kernel.proxyPort({iport})"))
from google.colab import output
output.serve_kernel_port_as_iframe(iport)

app = Flask(__name__)
app.logger.info("Starting...")
socketio = SocketIO(app)
app.secret_key = secrets.token_hex(16)

def log_line() :
  with os.popen('tail -n 1 /content/log.txt') as pse:
    for line in pse:
      return line

try:
    import websockets
except ImportError:
    print("Websockets package not found. Make sure it's installed.")

!fuser -k {iport}/tcp  # close UI port


#api1
# For local streaming, the websockets are hosted without ssl - ws://
HOST_stream = 'localhost:5005'
URI_stream = f'ws://{HOST_stream}/api/v1/stream'

# For reverse-proxied streaming, the remote will likely host with ssl - wss://
# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'

#api2 for one block reponse
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/generate'


def generate(prompt, temperature, top_p, typical_p, repetition_penalty, top_k):
    request = {
        'prompt': prompt,
        'max_new_tokens': 900,
        'do_sample': True,
        'temperature': temperature,
        'top_p': top_p,
        'typical_p': typical_p,
        'repetition_penalty': repetition_penalty,
        'top_k': top_k,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'skip_special_tokens': True,
        'stopping_strings': []
      # 'custom_stopping_strings': "You:" ##for example
    }

    response = requests.post(URI, json=request)

    if response.status_code == 200:
        result = response.json()['results'][0]['text']

    return result

async def run(context, temperature, top_p, typical_p, repetition_penalty, top_k):
    request = {
        'prompt': context,
        'max_new_tokens': 900,
        'do_sample': True,
        'temperature': temperature,
        'top_p': top_p,
        'typical_p': typical_p,
        'repetition_penalty': repetition_penalty,
        'top_k': top_k,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'skip_special_tokens': True,
        'stopping_strings': []
    }

    async with websockets.connect(URI_stream, ping_interval=None) as websocket:
        await websocket.send(json.dumps(request))

        #yield context  # Remove this if you just want to see the reply

        while True:
            incoming_data = await websocket.recv()
            incoming_data = json.loads(incoming_data)

            match incoming_data['event']:
                case 'text_stream':
                    yield incoming_data['text']
                case 'stream_end':
                    return

response_apistream =""

async def print_response_stream(prompt, temperature, top_p, typical_p, repetition_penalty, top_k):
    async for response in run(prompt, temperature, top_p, typical_p, repetition_penalty, top_k):
        global response_apistream
        response_apistream = response_apistream + response


def stop_stream():
    stop_url = f'http://{HOST}/api/v1/stop-stream'
    response = requests.post(stop_url,json={})
    #if response.status_code == 200:
    #   print("Stream stopped successfully.")
    #else:
    #    print("Failed to stop the stream.")

is_stream_running = False
is_summary_running = False

def api_stream(temperature, top_p, typical_p, repetition_penalty, top_k):
    global question_text, is_stream_running
    asyncio.run(print_response_stream(question_text, temperature, top_p, typical_p, repetition_penalty, top_k))


def start_api_stream(temperature, top_p, typical_p, repetition_penalty, top_k):
   global is_stream_running
   if not is_stream_running:
        t1 = threading.Thread(target=api_stream, args=(temperature, top_p, typical_p, repetition_penalty, top_k))
        t1.start()
        is_stream_running = True

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

def trim_string(input_string):
    input_string = str(input_string)
    trim_index = input_string.find('" metadata=')
    if trim_index != -1:  # If the phrase is found
        return input_string[14:trim_index] #first string page_content
    else:
        return input_string  # If the phrase isn't found, return the original string

def split_text_with_dot(text):
  text_splitter = CharacterTextSplitter(chunk_size=4000, chunk_overlap=0,separator=".") # size in characters not tokens (1 token = 4 chars)
  texts = text_splitter.split_text(text)                                                # good accuracy is at 1000 tokens context. Wider window makes accuracy less accurate.
                                                                                        # standard window size for vicuna is 2048 tokens. With superhot and 24GVRAM can be:
  text_splitter = CharacterTextSplitter(chunk_size=4000, chunk_overlap=0,separator=" ") # llama-13b	max_seq_len = 8192, compress_pos_emb = 4	6079 tokens
                                                                                        # llama-30b	max_seq_len = 3584, compress_pos_emb = 2	3100 tokens
                                                                                        # also "'max_new_tokens': 900" param matters as it define output length of each chunk.
                                                                                        # more detailed answer requires higer value
  #split with space if with dot is not possible
  texta = []
  textb = []
  for i in range(len(texts)):
    texta = text_splitter.split_text(str(texts[i]))
    textb.extend(texta)
  return textb

def wrap_lines(text, width):
    words = text.split()
    line = ''
    result = []

    for word in words:
        if word.endswith("*"):  #Separator
            word = word[:-1]  # Remove the asterisk at the end of the word
            line += word + ' '
            result.append(line.strip())
            line = '* '
        elif len(line) + len(word) <= width:
            line += word + ' '
        else:
            result.append(line.strip())
            line = word + ' '

    result.append(line.strip())
    return '\n'.join(result)

def start_make_summary(texts, summary_rounds, temperature, top_p, typical_p, repetition_penalty, top_k):
    global is_summary_running
    if not is_stream_running:
      t3 = threading.Thread(target=make_summary, args=(texts, summary_rounds, temperature, top_p, typical_p, repetition_penalty, top_k))
      t3.start()
      is_summary_running  = True

def make_summary(texts, summary_rounds, temperature, top_p, typical_p, repetition_penalty, top_k):
  global answer_text, is_summary_running, log_rounds, log_chunks
  for i in range(summary_rounds):
    text_splitter = CharacterTextSplitter(chunk_size=4000, chunk_overlap=0,separator="\n")
    sum_texts = ""
    prompt="This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions. You: Make a detail summary of the text in bullet points: "
    log_rounds="  Round "+ str(i+1) + " of " + str(summary_rounds)
    #log_chunks =" tst"
    for i in range(len(texts)):
      text = trim_string(texts[i])
      texts[i] = prompt + text + ". Assistant:"
      log_chunks="Processing "+ str(i+1) + " of " + str(len(texts)) + " chunks"
      sum_texts += generate(str(texts[i]), temperature, top_p, typical_p, repetition_penalty, top_k)
    texts = text_splitter.split_text(sum_texts)
  answer_text = ''.join(texts)
  answer_text = wrap_lines(answer_text, 200)
  is_summary_running  = False
 # %cd /content/
 # with open("summary.txt", "w") as w:
 #   w.writelines(answer_text)
  #return str(texts)


#example prompt
question_text ="This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions. You: Tell me about yourself. Assistant:"
answer_text = ""
start_time= ""
file_name=""

@app.route('/', methods=['GET', 'POST'])
def index():

    if request.method == 'POST':
        button_clicked = 'button_status' in request.form
        stream_enabled = 'stream_enable' in request.form
        summary_enabled = 'summary_enable' in request.form
        summary_rounds = int(request.form.get('summary_rounds', session.get('summary_rounds', '1')))
        temperature = float(request.form.get('temperature', session.get('temperature', '0.7')))
        top_p = float(request.form.get('top_p', session.get('top_p', '0.1')))
        typical_p = float(request.form.get('typical_p', session.get('typical_p', '1')))
        repetition_penalty = float(request.form.get('repetition_penalty', session.get('repetition_penalty', '1.00')))
        top_k = int(request.form.get('top_k', session.get('top_k', '40')))

        global question_text, answer_text, response_apistream, is_stream_running, is_summary_running, start_time, file_name ,log_rounds, log_chunks, model_load
        question_text = request.form.get('prompt', '')
        start_time = request.form.get('startTime','')


        # finish summary
        if not button_clicked and not is_summary_running and summary_enabled: # summary has been finished
           summary_enabled = False  # end of summary process
        # start summary thread
        if  not is_summary_running and summary_enabled: # if summary_enabled is true, browswer triggers post request every 5sec, to check if summary is finished by "is_summary_running == False"
            text = split_text_with_dot(question_text)
            start_make_summary(text, summary_rounds ,temperature, top_p, typical_p, repetition_penalty, top_k)
            modified_form = request.form.copy()
            modified_form['sendButton'] = 'Send Text'
            button_clicked = False #summary procces triggered and started
            file_name = request.form.get('fileInput','') + " with " + model_load # add used model name

        # proceed summary
        if  is_summary_running and summary_enabled:
            answer_text = "Generating summary..."+ log_rounds + " " + log_chunks

        # start & finish blocking response
        if button_clicked and not stream_enabled and not summary_enabled:
            answer_text = generate(question_text, temperature, top_p, typical_p, repetition_penalty, top_k)
            modified_form = request.form.copy()
            modified_form['sendButton'] = 'Send Text'
            button_clicked = False  # end final blocking response

        # finish stream
        if response_apistream == answer_text and response_apistream != "":
            button_clicked = False  # stream end
            is_stream_running = False
            response_apistream = ""
        # start stream thread
        if button_clicked and stream_enabled and not is_stream_running :
            response_apistream = ""
            start_api_stream(temperature, top_p, typical_p, repetition_penalty, top_k)  # Start stream
        # manual stop stream
        if button_clicked and request.form.get('sendButton', '') == "   Stop    ":
           stop_stream() # Stop stream
           modified_form = request.form.copy()
           modified_form['sendButton'] = 'Send Text'
           response_apistream = ""
           request.form = modified_form
           button_clicked = False  # Stream end
           is_stream_running = False
        # proceed stream
        if button_clicked and stream_enabled and not summary_enabled:
            if response_apistream == "":
                answer_text = "Generating..."
            else:
                answer_text = response_apistream  # Copy stream chunk

        log_text = log_line()

        session['temperature'] = temperature
        session['top_p'] = top_p
        session['typical_p'] = typical_p
        session['repetition_penalty'] = repetition_penalty
        session['top_k'] = top_k

    else:
        #question_text = request.form.get('prompt', '')
        answer_text = ''
        log_text = ''
        stream_enabled = True
        button_clicked = False
        summary_enabled = False
        summary_rounds = int(session.get('summary_rounds', '1'))
        temperature = float(session.get('temperature', '0.7'))
        top_p = float(session.get('top_p', '0.1'))
        typical_p = float(session.get('typical_p', '1'))
        repetition_penalty = float(session.get('repetition_penalty', '1.00'))
        top_k = int(session.get('top_k', '40'))

    return render_template_string('''
<!DOCTYPE html>
<html>
<head>
    <title>Make Me Summary Colab UI</title>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/2.0.0/FileSaver.min.js"></script>
    <script>

        function clickSummary() {
        $("#summary_enable").prop("checked", true);
        $("#button_status").prop("checked", true); // trigger summary process by (button_status and summary_enable) at the same time
        startTimeSum = new Date();
        $("#startTime").val(startTimeSum); // get date for summary timer
        document.forms[0].submit();  // Send POST
        }

        function loadFile() {
           var fileInput = document.getElementById('fileInput');
           var questionTextarea = document.getElementById('question');
           var file = fileInput.files[0];
           var reader = new FileReader();
           reader.onload = function(e) {
             var contents = e.target.result;
             questionTextarea.value = contents;
          };
          reader.readAsText(file);
        }

        function saveSummary() {
          var summary = document.getElementById('answer').value;
          var blob = new Blob([summary], { type: 'text/plain' });
          var temperatureSlider = document.getElementById("temperature");
          var top_pSlider = document.getElementById("top_p");
          var typical_pSlider = document.getElementById("typical_p");
          var repetition_penaltySlider = document.getElementById("repetition_penalty");
          var top_k_Slider = document.getElementById("top_k");
          var summary_roundsSlider = document.getElementById("summary_rounds");

          var fileNameArea = document.getElementById('fileName');
          var fileName = "Summary of " + fileNameArea.value + " (rounds=" + summary_roundsSlider.value + " temp=" + temperatureSlider.value + " top_p=" + top_pSlider.value + " typ_p=" + typical_pSlider.value + " rep_pen=" + repetition_penalty.value + " top_k=" + top_k.value + ").txt"  ;

          if (window.saveAs) {
            window.saveAs(blob, fileName);
            }  else {
              var url = URL.createObjectURL(blob);
              var link = document.createElement('a');
              link.href = url;
              link.download = fileName;
              link.click();
              URL.revokeObjectURL(url);
            }
        }

        function updateSliderValue(slider) {
            var sliderId = slider.id;
            var valueElement = document.getElementById(sliderId + 'Value');
            valueElement.innerText = slider.value;
        }

        function simulateButtonClick() {         //work around to get data from colab web url, due o lack of suport of data requests like json etc.
                document.forms[0].submit();  // Send POST in loop
        }

        function startTimer(period) {
            setInterval(simulateButtonClick, period); // 2.5 seconds
                 // document.forms[0].submit();  // Send POST first time after enabling stream
        }
//start doc ready function
      $(document).ready(function(){

            if (!$("#button_status").is(":checked")) { // if not
                 $("#sendButton").val("Send Text");
            }

            $("#loader").hide();

        var startTimearea = document.getElementById("startTime");
        var startTimeS = new Date(startTimearea.value);


        setTimeout(function(){ //wait second delay for answer logs view
         }, 1000);


        // labels update and timer view
        var FileNameAreaID = document.getElementById("fileName");
        var labelElementA = document.querySelector('label[for="answer"]');
        var labelElementQ = document.querySelector('label[for="question"]');
        var fileValue = FileNameAreaID.value;
        if (FileNameAreaID.value.trim() !== "") {
          labelElementQ.textContent = "Question: Make me summary";
          labelElementA.textContent = "Answer: " + " Summary of " + fileValue;
        }
        var seconds;
        var minutes;
        var timerId = setInterval(function() {
                  var currentTime = new Date();
                  var elapsedTime = Math.round((currentTime - startTimeS) / 1000);
                  seconds = elapsedTime % 60;
                  minutes = Math.floor(elapsedTime / 60);
                  if ($("#summary_enable").is(":checked")){
                      $("#answer").val("Generating summary...  " + minutes + "m " + seconds + "s");
                  }

                  if (!$("#summary_enable").is(":checked") && FileNameAreaID.value.trim() !== "") {
                    labelElementA.textContent = "Answer: " + " Summary of " + fileValue + " Generated in " + minutes + " minutes " + seconds + " seconds";
                    clearInterval(timerId);
                    saveSummary();
                  }
       }, 1000);

//start submit
             $("form").submit(function(event){  // Send Text button function definition

                $("#button_status").prop("checked", true); // button click

                $("#loader").show();
                startTime = new Date();
                $("#answer").val("Generating...  0s");
                timer = setInterval(function() {
                    var currentTime = new Date();
                    var elapsedTime = Math.round((currentTime - startTime) / 1000);
                    $("#answer").val("Generating...  " + elapsedTime + "s");
                }, 1000);

            });
//end submit
            if  ( $("#summary_enable").is(":checked") ) {
                 //event.preventDefault(); // Disable sending POST
                 startTimer(5000); // run cyclic request for summary status
                    }
              if ($("#stream_enable").is(":checked") && $("#button_status").is(":checked")) {
                 startTimer(2500); // run cyclic request for streamdata
                   }
        });
//end doc ready function
    </script>
    <style>
        #loader {
            border: 5px solid #f3f3f3;
            border-top: 5px solid #3498db;
            border-right: 5px solid #3498db;
            border-bottom: 5px solid #f3f3f3;
            border-left: 5px solid #f3f3f3;
            border-radius: 50%;
            width: 10px;
            height: 10px;
            animation: spin 1.5s linear infinite;
        }

        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }

        .hidden-checkbox {
           position: absolute;
           left: -9999px;
        }
      #startTime {
         display: none;
      }
      #fileName {
         display: none;
      }
      .button-container {
        border: 1px solid #aaa;
        padding: 8px;
        border-radius: 5px;
        display: inline-block;
}

</style>
</head>
<body>
    <form method="POST">
        <label for="question">Question:</label><br>
        <textarea id="question" name="prompt" cols="160" rows="3">{{ question_text }}</textarea><br>
        <label for="answer">Answer:</label><br>
        <textarea id="answer" name="answer" cols="160" rows="9">{{ answer_text }}</textarea>
        <textarea id="startTime" name="startTime" cols="15" rows="1">{{ start_time }}</textarea>
        <textarea id="fileName" name="fileName" cols="15" rows="1">{{ file_name }}</textarea><br>

        <input type="submit" id="sendButton" name="sendButton" value="   Stop    " ">&nbsp&nbsp&nbsp&nbsp&nbsp&nbsp
        <div class="button-container">
        <input type="file" id="fileInput" name="fileInput" onchange="loadFile()" accept=".txt" placeholder="Choose a file">
        <button type="button" id="summaryButton" onclick="clickSummary()">Make summary</button> &nbsp&nbsp&nbsp
        <label for="summary_rounds">Summary rounds:</label>
        <input type="range" id="summary_rounds" name="summary_rounds" min="1" max="5" step="1" value="{{ summary_rounds }}" oninput="updateSliderValue(this)">
        <span id="summary_roundsValue">{{ summary_rounds }}</span>&nbsp&nbsp&nbsp
        </div>
        <br><br>
        <input type="checkbox" id="stream_enable" name="stream_enable" {% if stream_enabled %}checked{% endif %}>
        <label for="stream_enable">Enable streaming</label>&nbsp&nbsp
        <input type="checkbox" id="summary_enable" name="summary_enable" class="hidden-checkbox" {% if summary_enabled %}checked{% endif %}>
        <br><br>&nbsp&nbsp
        <label for="temperature">Temperature:</label>
        <input type="range" id="temperature" name="temperature" min="0" max="2" step="0.01" value="{{ temperature }}" oninput="updateSliderValue(this)">
        <span id="temperatureValue">{{ temperature }}</span>&nbsp&nbsp
        <label for="top_p">Top P:</label>
        <input type="range" id="top_p" name="top_p" min="0" max="1" step="0.01" value="{{ top_p }}" oninput="updateSliderValue(this)">
        <span id="top_pValue">{{ top_p }}</span>&nbsp&nbsp
        <label for="typical_p">Typical P:</label>
        <input type="range" id="typical_p" name="typical_p" min="0" max="1" step="0.01" value="{{ typical_p }}" oninput="updateSliderValue(this)">
        <span id="typical_pValue">{{ typical_p }}</span>&nbsp&nbsp
        <label for="repetition_penalty">Repetition Penalty:</label>
        <input type="range" id="repetition_penalty" name="repetition_penalty" min="0" max="1.5" step="0.01" value="{{ repetition_penalty }}" oninput="updateSliderValue(this)">
        <span id="repetition_penaltyValue">{{ repetition_penalty }}</span>&nbsp&nbsp
        <label for="top_k">Top K:</label>
        <input type="range" id="top_k" name="top_k" min="1" max="200" step="1" value="{{ top_k }}" oninput="updateSliderValue(this)">
        <span id="top_kValue">{{ top_k }}</span>&nbsp&nbsp
        <input type="checkbox" id="button_status" name="button_status" class="hidden-checkbox" {% if button_clicked %}checked{% endif %}> <br>
        <p>{{ log_text }}</p>
        <div id="loader"></div>
    </form>
</body>
</html>
    ''', question_text=question_text, answer_text=answer_text, log_text=log_text, stream_enabled=stream_enabled, summary_enabled=summary_enabled, button_clicked=button_clicked, summary_rounds=summary_rounds, temperature=temperature, top_p=top_p, typical_p=typical_p, repetition_penalty=repetition_penalty, top_k=top_k, start_time=start_time, file_name=file_name)

#def socketio_thread():
socketio.run(app, port=iport)

#t2 = threading.Thread(target=socketio_thread, args=())
#t2.start()




External link: https://jidetpx7c39-496ff2e9c6d22116-5001-colab.googleusercontent.com/


<IPython.core.display.Javascript object>