# Disaster Tweets — AI Systems Engineering (LLM)


A clear, GitHub‑ready notebook: **config → data → tokenize → train → evaluate → inference → submission → service**.


## 1) Quickstart


```bash
pip install -e .
ai-tweets train --config configs/default.yaml --train-csv data/train.csv
ai-tweets serve --host 0.0.0.0 --port 8000
```


## 2) Config & Reproducibility


In [None]:
from pathlib import Path
import json, random, numpy as np
CFG = {"seed":42,"model_name":"distilbert-base-uncased","max_length":256,"epochs":3,"batch_size":16,
       "learning_rate":5e-5,"weight_decay":0.01,"warmup_ratio":0.1,
       "output_dir":"artifacts/model","metrics_path":"artifacts/metrics.json"}
def set_seed(s=42):
    random.seed(s); np.random.seed(s)
    try:
        import torch; torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    except Exception: pass
Path(CFG["output_dir"]).mkdir(parents=True, exist_ok=True)
Path(CFG["metrics_path"]).parent.mkdir(parents=True, exist_ok=True)
CFG


## 3) Data Loading


In [None]:
import pandas as pd
from pathlib import Path
CANDS_T = [Path('data/train.csv'), Path('./train.csv'), Path('../train.csv')]
CANDS_E = [Path('data/test.csv'), Path('./test.csv'), Path('../test.csv')]
first = lambda xs: next((p for p in xs if p.exists()), None)
TRAIN_PATH, TEST_PATH = first(CANDS_T), first(CANDS_E)
assert TRAIN_PATH is not None, 'Place train.csv under data/'
df = pd.read_csv(TRAIN_PATH); text_col = 'text' if 'text' in df.columns else ('tweet' if 'tweet' in df.columns else None)
assert text_col and 'target' in df.columns
df[text_col] = df[text_col].astype(str); len(df), text_col


## 4) Tokenization & Dataset Prep


In [None]:
from datasets import Dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
n_eval = max(1, int(len(df)*0.1)); df_tr, df_ev = df.iloc[:-n_eval].reset_index(drop=True), df.iloc[-n_eval:].reset_index(drop=True)
train_ds, eval_ds = Dataset.from_pandas(df_tr[[text_col,'target']]), Dataset.from_pandas(df_ev[[text_col,'target']])
tok = AutoTokenizer.from_pretrained(CFG['model_name'])
tok_fn = lambda b: tok(b[text_col], truncation=True, max_length=CFG['max_length'])
train_ds, eval_ds = train_ds.map(tok_fn, batched=True), eval_ds.map(tok_fn, batched=True)
collator = DataCollatorWithPadding(tokenizer=tok); len(train_ds), len(eval_ds)


## 5) Training (HF Trainer)


In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
set_seed(CFG['seed'])
args = TrainingArguments(output_dir=str(Path(CFG['output_dir'])), per_device_train_batch_size=CFG['batch_size'],
    per_device_eval_batch_size=CFG['batch_size'], num_train_epochs=CFG['epochs'], learning_rate=CFG['learning_rate'],
    weight_decay=CFG['weight_decay'], warmup_ratio=CFG['warmup_ratio'], evaluation_strategy='epoch', save_strategy='epoch',
    logging_strategy='steps', logging_steps=50, load_best_model_at_end=True, metric_for_best_model='f1', greater_is_better=True, report_to=[])
model = AutoModelForSequenceClassification.from_pretrained(CFG['model_name'], num_labels=2)
metrics_fn = lambda ep: {'accuracy': accuracy_score(ep[1], ep[0].argmax(axis=-1)), 'f1': f1_score(ep[1], ep[0].argmax(axis=-1))}
trainer = Trainer(model=model, args=args, train_dataset=train_ds, eval_dataset=eval_ds, tokenizer=tok, data_collator=collator, compute_metrics=metrics_fn)
trainer.train(); metrics = trainer.evaluate()
trainer.save_model(CFG['output_dir']); tok.save_pretrained(CFG['output_dir']); Path(CFG['metrics_path']).write_text(json.dumps(metrics, indent=2)); metrics


## 6) Evaluation (compact)


In [None]:
preds = trainer.predict(eval_ds); y_true, y_pred = preds.label_ids, preds.predictions.argmax(axis=-1)
from sklearn.metrics import classification_report
print('Accuracy:', accuracy_score(y_true,y_pred)); print('F1      :', f1_score(y_true,y_pred))
print('\nClassification report:\n', classification_report(y_true,y_pred, digits=4))


## 7) Inference Demo


In [None]:
from transformers import pipeline
clf = pipeline('text-classification', model=CFG['output_dir'], tokenizer=CFG['output_dir'])
clf(['There is a huge fire downtown and people are evacuating!','Beautiful weather today, going to the park.'])


## 8) (Optional) Submission


In [None]:
from pathlib import Path, PurePath
if TEST_PATH is not None:
    df_te = pd.read_csv(TEST_PATH); assert text_col in df_te.columns
    ys = clf(list(df_te[text_col].astype(str))); labels = [int(p['label'].replace('LABEL_','')) for p in ys]
    sub = pd.DataFrame({'target': labels})
    if 'id' in df_te.columns: sub.insert(0, 'id', df_te['id'].values)
    outp = Path('artifacts')/'submission.csv'; outp.parent.mkdir(parents=True, exist_ok=True); sub.to_csv(outp, index=False); print('Saved ->', outp)
else:
    print('No test.csv detected — skipping submission')


## 9) FastAPI Service (HTTP + WS + /metrics)


In [None]:
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Response
from pydantic import BaseModel
from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST
import time, json
from typing import Optional, List
REQUEST_COUNT = Counter('inference_requests_total','', ['endpoint'])
REQUEST_ERRORS = Counter('inference_errors_total','', ['endpoint'])
LATENCY = Histogram('inference_latency_seconds','', ['endpoint'])
class PredictIn(BaseModel):
    text: Optional[str]=None; texts: Optional[List[str]]=None
def make_app(model_dir: str):
    from transformers import pipeline as hf_pipe
    app = FastAPI(title='LLM Text Classification Service', version='1.0.0'); lp = hf_pipe('text-classification', model=model_dir, tokenizer=model_dir)
    @app.get('/health')
    def health(): return {'status':'ok'}
    @app.post('/predict')
    def predict(inp: PredictIn):
        REQUEST_COUNT.labels(endpoint='/predict').inc(); t=time.perf_counter()
        try:
            batch = inp.texts if inp.texts else ([inp.text] if inp.text else None)
            if not batch: raise ValueError('Provide \'text\' or \'texts\'')
            outs = lp(batch, truncation=True)
            norm = [{'label': o.get('label','LABEL_1').replace('LABEL_',''), 'score': float(o.get('score',0.0))} for o in outs]
            return {'predictions': norm}
        except Exception:
            REQUEST_ERRORS.labels(endpoint='/predict').inc(); raise
        finally:
            LATENCY.labels(endpoint='/predict').observe(time.perf_counter()-t)
    @app.get('/metrics')
    def metrics(): return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)
    @app.websocket('/ws')
    async def ws(ws: WebSocket):
        await ws.accept()
        try:
            while True:
                msg = await ws.receive_text(); REQUEST_COUNT.labels(endpoint='/ws').inc(); t=time.perf_counter()
                try:
                    payload = json.loads(msg); text = payload.get('text')
                    if not text:
                        await ws.send_text(json.dumps({'error':'send JSON with \'text\''})); continue
                    out = lp([text], truncation=True)[0]
                    await ws.send_text(json.dumps({'label': out.get('label','LABEL_1').replace('LABEL_',''), 'score': float(out.get('score',0.0))}))
                except Exception as e:
                    REQUEST_ERRORS.labels(endpoint='/ws').inc(); await ws.send_text(json.dumps({'error': str(e)}))
                finally:
                    LATENCY.labels(endpoint='/ws').observe(time.perf_counter()-t)
        except WebSocketDisconnect:
            pass
    return app
app = make_app(CFG['output_dir']); print('Run: uvicorn notebook:app --reload --port 8000')
