In [None]:
#CELL 1
#@title Keep this widget playing to prevent Colab from disconnecting you { display-mode: "form" }
#@markdown Press play on the audio player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
import requests
import tarfile
import os
import re

#@title # **Koboldcpp 1.43 Colab (Improved Edition)**

#@markdown ---
#@markdown # Download Options

# URL of the built koboldcpp folder
url = "https://github.com/kalomaze/koboldcpp/releases/download/Colab/koboldcpp.tar.gz"

Model = "MythoMax-L2-13B-GGUF" #@param ["MythoMax-L2-13B-GGUF", "ReMM-SLERP-L2-13B-GGUF", "Stheno-L2-13B-GGUF"]
Quant_Method = "4_K_M" #@param ["3_K_L", "4_K_S", "4_K_M", "5_K_S", "5_K_M"]

#@markdown #### OPTIONAL: Manual Model Link
Use_Manual_Model = False #@param {type:"boolean"}
Manual_Link = "" #@param {type:"string"}

#@markdown ---
#@markdown # Launch Options


Layers = 43 #@param [43]{allow-input: true}
Context = 4096 #@param [4096]{allow-input: true}

#@markdown #### OPTIONAL: Build Latest Kobold (takes ~7 minutes)
Force_Update_Build = False #@param {type:"boolean"}

model_links = {
    "MythoMax-L2-13B-GGUF": "https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q{}.gguf",
    "ReMM-SLERP-L2-13B-GGUF": "https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q{}.gguf",
    "Stheno-L2-13B-GGUF": "https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q{}.gguf"
}

if Use_Manual_Model:
    if Manual_Link.strip() != "":
        print(f"\nManual Model detected; will use {Manual_Link} instead of {Model}\n")
        Model = Manual_Link
        Model = Model.replace('/blob/', '/resolve/')
    else:
        print(f"\nWarning: Manual Model enabled, but no link was found. Falling back to {Model}\n")
        if Model in model_links:
            Model = model_links[Model].format(Quant_Method)
else:
    if Model in model_links:
        Model = model_links[Model].format(Quant_Method)

# Check if Model doesn't end in the specified formats
if not re.search(r'(\.gguf|\.ggml|\.bin|\.safetensors)$', Model):
    print("--------------------------\n5 SECOND WARNING: Manual link provided doesn't end with a supported format.\nAre you sure you provided a direct link?\n--------------------------\n")
    !sleep 5

# Check if the Model starts with https://huggingface.co/ but doesn't follow the specified format
if Model.startswith('https://huggingface.co/') and not re.search(r'^https://huggingface\.co/.+/.+/.+/.+/[^/]+\.[^/]+$', Model):
    print("--------------------------\n10 SECOND WARNING: The HuggingFace link provided is of the entire model repository.\nPlease find the direct link to the quant you want to use.\n--------------------------\n")
    !sleep 10

# Ensure the directory exists
if not os.path.exists('/content/koboldcpp/'):
    os.makedirs('/content/koboldcpp/')

# Checking if you already downloaded Kobold
if not os.path.exists("/content/koboldcpp.tar.gz"):
    if Force_Update_Build == False:
        response = requests.get(url, stream=True)
        filename = url.split("/")[-1]
        with open(filename, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)

        with tarfile.open(filename, 'r:gz') as tar:
            for member in tar.getmembers():
                if member.name.startswith('koboldcpp'):
                    try:
                        tar.extract(member, path='/content')
                    except Exception as e:
                        print(f"Error extracting '{member.name}': {str(e)}")

        print("Kobold extraction to /content/ completed!")
    else:
        print("Skipping prebuilt kobold, will build manually...")
        !git clone https://github.com/LostRuins/koboldcpp
        %cd /content/koboldcpp
        !make LLAMA_CUBLAS=1

# Change to the directory
%cd /content/koboldcpp

# Hosting the cloudflared server
!wget -c -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!chmod +x cloudflared-linux-amd64
!echo > nohup.out
!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &
!sleep 10
!cat nohup.out

# Download the file if it doesn't exist already
if not os.path.exists('/content/koboldcpp/model.gguf'):
    !wget $Model -O model.gguf

if os.path.exists('/content/koboldcpp/model.gguf'):
    !rm koboldcpp.py
    !wget https://github.com/kalomaze/koboldcpp/raw/colab-api-url/koboldcpp.py
    !python koboldcpp.py model.gguf --stream --usecublas 0 normal mmq --context $Context --ropeconfig 1.0 10000 --gpulayers $Layers --hordeconfig concedo
else:
    print("Failed to download the GGUF model. Please retry.")

# Quick How-To Guide

---
## Step 1. Keeping Google Colab Running
---

Google Colab has a tendency to timeout after a period of inactivity. If you want to ensure your session doesn't timeout abruptly, you can use the following widget.

### Starting the Widget for Audio Player:

> <img src="https://cdn.discordapp.com/attachments/945486970883285045/1150363694191104112/image.png" width="50%"/>

### How the Widget Looks When Playing:

> <img src="https://cdn.discordapp.com/attachments/945486970883285045/1150363653997076540/image.png" width="50%"/>

Follow the visual cues in the images to start the widget and ensure that the notebook remains active.

---
## Step 2. Decide your Model
---

Pick a model and the quantization from the dropdowns, then run the cell like how you did earlier.

### Select your Model and Quantization:

> <img src="https://cdn.discordapp.com/attachments/945486970883285045/1150370141557764106/image.png" width="40%"/>

Alternatively, you can specify a model manually.

### Manual Model Option:

> <img src="https://media.discordapp.net/attachments/945486970883285045/1150370631242764370/image.png" width="75%"/>

5_K_M 13b models should work with 4k (maybe 3k?) context on Colab, since the T4 GPU has ~16GB of VRAM. You can now start the cell, and after 1-3 minutes, it should end with your API link that you can connect to in [SillyTavern](https://docs.sillytavern.app/installation/windows/):

> <img src="https://cdn.discordapp.com/attachments/945486970883285045/1150438287882862674/image.png" width="80%"/>

---
# And there you have it!
### MythoMax (or any 7b / 13b Llama 2 model) in under 2 minutes.
#### (depending on whether or not huggingface downloads are experiencing high traffic)

---

# Credits
### - Made with ~~spite~~ love by kalomaze ❤️ <sub>(also here's the part where I shill my [Patreon](https://www.patreon.com/kalomaze) if you care!)</sub>
### - Koboldcpp is not my software, this is just to make it easy to use on Colab. You can find the original GitHub repository for it here: https://github.com/LostRuins/koboldcpp