In [1]:
# Colab cell (bash)
!pip install -q scikit-learn joblib flask gunicorn

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m81.9/85.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Colab cell (python)
import os
import joblib
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [3]:
# Choose categories to keep training light and focused
categories = ["rec.sport.baseball", "sci.space", "comp.graphics", "talk.politics.misc"]

data = fetch_20newsgroups(subset="all", categories=categories, remove=('headers','footers','quotes'))
X, y = data.data, data.target

# quick train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# define pipeline
model = make_pipeline(TfidfVectorizer(max_features=20000, ngram_range=(1,2)), MultinomialNB())

# train
model.fit(X_train, y_train)

# evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=data.target_names))

Accuracy: 0.8887399463806971
                    precision    recall  f1-score   support

     comp.graphics       0.93      0.93      0.93       195
rec.sport.baseball       0.83      0.93      0.88       199
         sci.space       0.88      0.89      0.88       197
talk.politics.misc       0.95      0.78      0.86       155

          accuracy                           0.89       746
         macro avg       0.90      0.88      0.89       746
      weighted avg       0.89      0.89      0.89       746



In [4]:
examples = [
    "The satellite launch was successful and NASA confirmed orbit.",
    "The pitcher threw a complete game shutout last night in the baseball series.",
    "3D rendering of graphics using OpenGL shaders.",
    "The latest political debate covered foreign policy and taxation."
]

preds = model.predict(examples)
for txt, p in zip(examples, preds):
    print(f"> {data.target_names[p]}  \n  {txt}\n")

> sci.space  
  The satellite launch was successful and NASA confirmed orbit.

> rec.sport.baseball  
  The pitcher threw a complete game shutout last night in the baseball series.

> comp.graphics  
  3D rendering of graphics using OpenGL shaders.

> sci.space  
  The latest political debate covered foreign policy and taxation.



In [7]:
# Save locally in Colab VM
os.makedirs("models", exist_ok=True)
joblib.dump((model, data.target_names), "models/text_nb_pipeline.joblib")
print("Saved to models/text_nb_pipeline.joblib")

Saved to models/text_nb_pipeline.joblib


In [8]:
from flask import Flask, request, jsonify
import joblib

# Load the model
model, labels = joblib.load("models/text_nb_pipeline.joblib")

# Flask app
app = Flask(__name__)

@app.route("/predict", methods=["POST"])
def predict():
    text = request.json.get("text", "")
    pred = model.predict([text])[0]
    return jsonify({"label_index": int(pred), "label": labels[pred]})

@app.route("/health")
def health():
    return "ok"

# Run server

In [9]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [11]:
# install node + npm (may take ~30s)
!apt-get update -y
!apt-get install -y nodejs npm

# install localtunnel globally
!npm install -g localtunnel

Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:2 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:5 https://cli.github.com/packages stable InRelease [3,917 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [2,006 kB]
Get:13 https://cli.github.com/packages stable/main am

In [12]:
!gunicorn --bind 0.0.0.0:5000 app:app --workers 2 --timeout 120 &

[2025-09-09 17:04:02 +0000] [7667] [INFO] Starting gunicorn 23.0.0
[2025-09-09 17:04:02 +0000] [7667] [INFO] Listening at: http://0.0.0.0:5000 (7667)
[2025-09-09 17:04:02 +0000] [7667] [INFO] Using worker: sync
[2025-09-09 17:04:02 +0000] [7668] [INFO] Booting worker with pid: 7668
[2025-09-09 17:04:02 +0000] [7668] [ERROR] Exception in worker process
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/gunicorn/arbiter.py", line 608, in spawn_worker
    worker.init_process()
  File "/usr/local/lib/python3.12/dist-packages/gunicorn/workers/base.py", line 135, in init_process
    self.load_wsgi()
  File "/usr/local/lib/python3.12/dist-packages/gunicorn/workers/base.py", line 147, in load_wsgi
    self.wsgi = self.app.wsgi()
                ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gunicorn/app/base.py", line 66, in wsgi
    self.callable = self.load()
                    ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gunic

In [13]:
!lt --port 10

your url is: https://red-nails-hug.loca.lt
^C


In [15]:
!apt-get update -y
!apt-get install -y docker.io
!service docker start
!docker --version

0% [Working]            Hit:1 https://cli.github.com/packages stable InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.39)] [Waiting for headers] [C                                                                               Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Waiting f                                                                               Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Connected                                                                               Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to ppa.launchpadconte                                                                               Hit:5 http://archive.ubuntu.com/ubuntu jam

In [16]:
import os, joblib
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Train quick model
cats = ["rec.sport.baseball", "sci.space"]
data = fetch_20newsgroups(subset="all", categories=cats, remove=('headers','footers','quotes'))
model = make_pipeline(TfidfVectorizer(max_features=5000), MultinomialNB())
model.fit(data.data, data.target)

# Save model in Colab VM
os.makedirs("models", exist_ok=True)
joblib.dump((model, data.target_names), "models/text_nb_pipeline.joblib")
print("✅ Model saved in Colab VM")

✅ Model saved in Colab VM


In [20]:
%%bash
cat > app.py <<'EOF'
import joblib, os
from flask import Flask, request, jsonify

model, labels = joblib.load("models/text_nb_pipeline.joblib")

app = Flask(__name__)

@app.route("/health", methods=["GET"])
def health():
    return "ok", 200

@app.route("/predict", methods=["POST"])
def predict():
    payload = request.json or {}
    text = payload.get("text", "")
    if not text:
        return jsonify({"error": "no text provided"}), 400
    pred = model.predict([text])[0]
    return jsonify({"label_index": int(pred), "label": labels[pred]})
EOF


In [21]:
%%bash
cat > requirements.txt <<'EOF'
flask
gunicorn
scikit-learn
joblib
EOF

In [22]:
%%bash
cat > Dockerfile <<'EOF'
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY app.py .
COPY models models

EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app", "--workers", "2", "--timeout", "120"]
EOF

In [24]:
# Colab cell (bash)
!which docker || echo "docker not found"
!docker --version || echo "docker CLI present but daemon may not be running"

/usr/bin/docker
Docker version 27.5.1, build 27.5.1-0ubuntu3~22.04.2


In [25]:
!docker info

Client:
 Version:    27.5.1
 Context:    default
 Debug Mode: false

Server:
ERROR: Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?
errors pretty printing info


In [26]:
# Single Colab cell: try to start dockerd and show status/logs
import time, os, sys, subprocess, pathlib

LOG="/tmp/dockerd.log"

# start dockerd in background (nohub) if it's not already running
print("1) Checking for existing docker daemon...")
proc = subprocess.run(["pgrep","-f","dockerd"], capture_output=True, text=True)
if proc.returncode == 0:
    print("dockerd already running (pid):", proc.stdout.strip())
else:
    print("Starting dockerd (this may take a few seconds)...")
    # start dockerd; redirect output to LOG
    os.system("nohup /usr/bin/dockerd --host=unix:///var/run/docker.sock --host=tcp://127.0.0.1:2375 > {} 2>&1 &".format(LOG))
    time.sleep(7)

# show last lines of log
print("\n2) Last 200 lines of dockerd log (if any):\n" + "-"*60)
if pathlib.Path(LOG).exists():
    print(subprocess.run(["tail","-n","200", LOG], capture_output=True, text=True).stdout)
else:
    print("No dockerd log found at", LOG)

# try docker info
print("\n3) docker info (this will show daemon status):\n" + "-"*60)
proc = subprocess.run(["docker","info"], capture_output=True, text=True)
if proc.returncode == 0:
    print(proc.stdout)
else:
    print("docker info failed. stderr:\n")
    print(proc.stderr)
    print("\nIf docker still isn't running, Colab may block running the Docker daemon on this VM.")
    print("Two recommended alternatives:")
    print(" A) Use GitHub Actions to build & push the Docker image for you (I can provide workflow).")
    print(" B) Build/push locally (if you have Docker on your machine).")

1) Checking for existing docker daemon...
Starting dockerd (this may take a few seconds)...

2) Last 200 lines of dockerd log (if any):
------------------------------------------------------------
time="2025-09-09T17:27:48.310455567Z" level=info msg="Starting up"
time="2025-09-09T17:27:49.312248094Z" level=info msg="containerd not running, starting managed containerd"
time="2025-09-09T17:27:49.316486101Z" level=info msg="started new containerd process" address=/var/run/docker/containerd/containerd.sock module=libcontainerd pid=15188
time="2025-09-09T17:27:49.449727769Z" level=info msg="starting containerd" revision= version=1.7.27
time="2025-09-09T17:27:49.562389261Z" level=info msg="loading plugin \"io.containerd.snapshotter.v1.aufs\"..." type=io.containerd.snapshotter.v1
time="2025-09-09T17:27:49.595061883Z" level=info msg="skip loading plugin \"io.containerd.snapshotter.v1.aufs\"..." error="aufs is not supported (modprobe aufs failed: exit status 1 \"modprobe: FATAL: Module aufs not