In [15]:
import os
import requests
import pandas as pd
import json


### **Recommended Model Sizes for Colab Pro:**
| Model Type | Parameters (Billions) | VRAM Required (GB) | RAM Required (GB) | Notes |
|------------|----------------------|--------------------|-------------------|-------|
| **Tiny LLM** | 3B - 7B | 5-10 GB | 8-12 GB | Fastest on Colab GPUs (T4, P100) |
| **Mid-size LLM** | 13B - 20B | 12-16 GB | 16+ GB | Needs A100 (Colab Pro+) |
| **Large LLM** | 30B+ | 24-40 GB | 32+ GB | Only runs efficiently on A100 40GB |

---

### **Colab GPU Tiers and LLM Feasibility:**
| Colab Plan | GPU (Varies) | VRAM | Suitable Max Model Size |
|------------|-------------|------|--------------------------|
| **Colab Free** | T4 / P100 | 12-16 GB | 7B (Possibly 13B with quantization) |
| **Colab Pro** | T4 / V100 | 16-24 GB | 13B-20B (8-bit quantized) |
| **Colab Pro+** | A100 40GB | 40 GB | 30B+ (best for large models) |




# Setup Ollama

## Install LLM models

In [16]:
!curl https://ollama.ai/install.sh | sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13281    0 13281    0     0  58390      0 --:--:-- --:--:-- --:--:-- 58506
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [17]:
!nohup bash -c "OLLAMA_HOST=0.0.0.0:7000 OLLAMA_ORIGIN=* ollama serve" &
!sleep 5 && tail /content/nohup.out

nohup: appending output to 'nohup.out'
llama_init_from_model:  CUDA_Host  output buffer size =     0.82 MiB
llama_init_from_model:      CUDA0 compute buffer size =   563.00 MiB
llama_init_from_model:  CUDA_Host compute buffer size =    21.01 MiB
llama_init_from_model: graph nodes  = 1225
llama_init_from_model: graph splits = 2
time=2025-04-17T22:21:10.418Z level=INFO source=server.go:619 msg="llama runner started in 0.75 seconds"
[GIN] 2025/04/17 - 22:21:14 | 200 |  5.641585285s | 156.208.237.171 | POST     "/api/chat"
[GIN] 2025/04/17 - 22:23:32 | 200 | 13.749960707s | 156.208.237.171 | POST     "/api/chat"
[GIN] 2025/04/17 - 22:24:50 | 200 |  3.195823834s | 156.208.237.171 | POST     "/api/chat"
Error: listen tcp 0.0.0.0:7000: bind: address already in use


In [18]:
%env OLLAMA_HOST=0.0.0.0:7000
%env OLLAMA_ORIGIN=*


env: OLLAMA_HOST=0.0.0.0:7000
env: OLLAMA_ORIGIN=*


In [19]:
!echo $OLLAMA_HOST
!echo $OLLAMA_ORIGIN


0.0.0.0:7000
nohup.out sample_data


In [20]:
# !ollama pull llama2:13b

In [21]:
!ollama pull phi

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 04778965089b... 100% ▕▏ 1.6 GB                         [K
pulling 7908abcab772... 100% ▕▏ 1.0 KB                         [K
pulling 774a15e6f1e5... 100% ▕▏   77 B                         [K
pulling 3188becd6bae... 100% ▕▏  132 B                         [K
pulling 0b8127ddf5ee... 100% ▕▏   42 B                         [K
pulling 4ce4b16d33a3... 100% ▕▏  555 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l


In [22]:
!ollama pull mistral

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling ff82381e2bea... 100% ▕▏ 4.1 GB                         [K
pulling 43070e2d4e53... 100% ▕▏  11 KB                         [K
pulling 491dfa501e59... 100% ▕▏  801 B                         [K
pulling ed11eda7790d... 100% ▕▏   30 B                         [K
pulling 42347cd80dc8... 100% ▕▏  485 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l


In [23]:
!ollama pull llama3

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 6a0746a1ec1a... 100% ▕▏ 4.7 GB                         [K
pulling 4fa551d4f938... 100% ▕▏  12 KB                         [K
pulling 8ab4849b038c... 100% ▕▏  254 B                         [K
pulling 577073ffcc6c... 100% ▕▏  110 B                         [K
pulling 3f8eb4da87fa... 100% ▕▏  485 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l


## Ngrok

In [24]:
!ollama list

NAME                 ID              SIZE      MODIFIED               
llama3:latest        365c0bd3c000    4.7 GB    Less than a second ago    
mistral:latest       f974a74358d6    4.1 GB    Less than a second ago    
phi:latest           e2fd6321a5fe    1.6 GB    1 second ago              
all-minilm:l12-v2    4f5da3bd944d    67 MB     About an hour ago         
llama2:13b           d475bf4c50bc    7.4 GB    About an hour ago         


In [25]:
!pip install pyngrok



In [26]:
from google.colab import userdata
from pyngrok import ngrok, conf

ngrok_auth = userdata.get('colab-ngrok')

conf.get_default().auth_token = ngrok_auth

port = "7000"

public_url = ngrok.connect(port).public_url
print(public_url)

https://8216-34-90-52-240.ngrok-free.app


In [27]:
!ollama list

NAME                 ID              SIZE      MODIFIED          
llama3:latest        365c0bd3c000    4.7 GB    3 seconds ago        
mistral:latest       f974a74358d6    4.1 GB    3 seconds ago        
phi:latest           e2fd6321a5fe    1.6 GB    4 seconds ago        
all-minilm:l12-v2    4f5da3bd944d    67 MB     About an hour ago    
llama2:13b           d475bf4c50bc    7.4 GB    About an hour ago    
