In [5]:
# AI PM Portfolio Project: Delivery Request Classifier
# This project demonstrates end-to-end AI product development for Last Mile Delivery

# ============================================================================
# STEP 1: SETUP AND IMPORTS
# ============================================================================
print("Installing required libraries...")
# Run this cell first in Google Colab
# !pip install transformers datasets huggingface_hub accelerate -q

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import json

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# ============================================================================
# STEP 2: CREATE SYNTHETIC TRAINING DATA
# ============================================================================
# In a real AI PM project, you'd gather this from user research and existing data
# This simulates customer delivery requests

delivery_requests = [
    # Urgent residential
    "Need same-day delivery to my home, very urgent!",
    "Emergency delivery needed ASAP to residence",
    "Can you rush this to my house today?",
    "Urgent! Need this at home within 2 hours",

    # Standard residential
    "Please deliver to my home address next week",
    "Standard delivery to residential address is fine",
    "Can you drop this off at my house sometime this week?",
    "Regular delivery to home, no rush",

    # Urgent commercial
    "Business needs this delivered urgently today",
    "Rush delivery to our office required",
    "Need immediate delivery to company warehouse",
    "Urgent office delivery needed within hours",

    # Standard commercial
    "Standard delivery to our business location",
    "Regular office delivery is acceptable",
    "Please deliver to warehouse next week",
    "Business address, standard shipping fine"
]

labels = [0, 0, 0, 0,  # urgent residential (0)
          1, 1, 1, 1,  # standard residential (1)
          2, 2, 2, 2,  # urgent commercial (2)
          3, 3, 3, 3]  # standard commercial (3)

# Create DataFrame
df = pd.DataFrame({'text': delivery_requests, 'label': labels})

# Split into train/test
train_size = int(0.8 * len(df))
train_df = df[:train_size]
test_df = df[train_size:]

print(f"\nDataset created: {len(train_df)} training samples, {len(test_df)} test samples")
print("\nLabel mapping:")
print("0: Urgent Residential")
print("1: Standard Residential")
print("2: Urgent Commercial")
print("3: Standard Commercial")

# ============================================================================
# STEP 3: PREPARE DATA FOR HUGGING FACE
# ============================================================================

# Convert to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

dataset = DatasetDict({
    'train': train_dataset,
    'test': test_dataset
})

print("\nDataset structure:")
print(dataset)

# ============================================================================
# STEP 4: LOAD PRE-TRAINED MODEL FROM HUGGING FACE
# ============================================================================

model_name = "distilbert-base-uncased"  # Lightweight, fast model
print(f"\nLoading model: {model_name}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=4  # 4 categories
)

# Tokenize function
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True)

print("Data tokenization complete!")

# ============================================================================
# STEP 5: DEFINE TRAINING CONFIGURATION
# ============================================================================
import os
os.environ["WANDB_DISABLED"] = "true"

# Metrics function for evaluation
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted'
    )
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Training arguments optimized for Colab free tier
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    compute_metrics=compute_metrics,
)

# ============================================================================
# STEP 6: TRAIN THE MODEL
# ============================================================================

print("\n" + "="*50)
print("TRAINING STARTED")
print("="*50)

train_result = trainer.train()

print("\n" + "="*50)
print("TRAINING COMPLETE")
print("="*50)

# ============================================================================
# STEP 7: EVALUATE AND TEST
# ============================================================================

# Evaluate on test set
eval_results = trainer.evaluate()

print("\nModel Performance:")
print(f"Accuracy: {eval_results['eval_accuracy']:.2%}")
print(f"F1 Score: {eval_results['eval_f1']:.2%}")
print(f"Precision: {eval_results['eval_precision']:.2%}")
print(f"Recall: {eval_results['eval_recall']:.2%}")

# ============================================================================
# STEP 8: INFERENCE - TEST WITH NEW EXAMPLES
# ============================================================================

def predict_delivery_type(text):
    """Predict delivery type for new request"""

    # 1. Tokenize and move to the model's device (GPU/CPU)
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(model.device)

    # 2. Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
        confidence = predictions[0][predicted_class].item()

    # 3. Map prediction to label
    label_map = {
        0: "Urgent Residential",
        1: "Standard Residential",
        2: "Urgent Commercial",
        3: "Standard Commercial"
    }

    return label_map[predicted_class], confidence

# Test with new examples
test_requests = [
    "Need emergency delivery to my apartment ASAP!",
    "Can you deliver to our office building next Thursday?",
    "Standard home delivery is fine",
]

print("\n" + "="*50)
print("TESTING NEW PREDICTIONS")
print("="*50)

for req in test_requests:
    prediction, confidence = predict_delivery_type(req)
    print(f"\nRequest: '{req}'")
    print(f"Prediction: {prediction}")
    print(f"Confidence: {confidence:.2%}")

# ============================================================================
# STEP 9: SAVE MODEL FOR GITHUB PORTFOLIO
# ============================================================================

# Save model locally (you'll upload to GitHub)
model.save_pretrained('./delivery_classifier_model')
tokenizer.save_pretrained('./delivery_classifier_model')

print("\nâœ… Model saved to './delivery_classifier_model'")
print("\n" + "="*50)
print("PROJECT COMPLETE!")
print("="*50)

# ============================================================================
# KEY METRICS FOR YOUR RESUME (Document these!)
# ============================================================================

print("\nðŸ“Š METRICS TO INCLUDE ON YOUR AI PM RESUME:")
print(f"â€¢ Built and deployed NLP classifier with {eval_results['eval_accuracy']:.1%} accuracy")
print(f"â€¢ Processed and categorized delivery requests using transformer models")
print(f"â€¢ Implemented end-to-end ML pipeline from data prep to inference")
print(f"â€¢ Utilized Hugging Face transformers and DistilBERT architecture")
print(f"â€¢ Achieved {eval_results['eval_f1']:.1%} F1 score on classification task")

# ============================================================================
# NEXT STEPS FOR YOUR PORTFOLIO
# ============================================================================

print("\nðŸš€ NEXT STEPS:")
print("1. Expand dataset with 100+ examples (use ChatGPT to generate more)")
print("2. Add model versioning and experiment tracking (MLflow/Weights & Biases)")
print("3. Create a Streamlit or Gradio demo interface")
print("4. Document product requirements and success metrics")
print("5. Write a Medium article about your learnings")
print("6. Push to GitHub with clear README")

Installing required libraries...
PyTorch version: 2.9.0+cu126
CUDA available: True

Dataset created: 12 training samples, 4 test samples

Label mapping:
0: Urgent Residential
1: Standard Residential
2: Urgent Commercial
3: Standard Commercial

Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 12
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4
    })
})

Loading model: distilbert-base-uncased


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Data tokenization complete!

TRAINING STARTED


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.424287,0.0,0.0,0.0,0.0
2,No log,1.443921,0.0,0.0,0.0,0.0
3,No log,1.482802,0.0,0.0,0.0,0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



TRAINING COMPLETE



Model Performance:
Accuracy: 0.00%
F1 Score: 0.00%
Precision: 0.00%
Recall: 0.00%

TESTING NEW PREDICTIONS

Request: 'Need emergency delivery to my apartment ASAP!'
Prediction: Urgent Residential
Confidence: 28.53%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Request: 'Can you deliver to our office building next Thursday?'
Prediction: Urgent Residential
Confidence: 28.20%

Request: 'Standard home delivery is fine'
Prediction: Urgent Residential
Confidence: 27.85%

âœ… Model saved to './delivery_classifier_model'

PROJECT COMPLETE!

ðŸ“Š METRICS TO INCLUDE ON YOUR AI PM RESUME:
â€¢ Built and deployed NLP classifier with 0.0% accuracy
â€¢ Processed and categorized delivery requests using transformer models
â€¢ Implemented end-to-end ML pipeline from data prep to inference
â€¢ Utilized Hugging Face transformers and DistilBERT architecture
â€¢ Achieved 0.0% F1 score on classification task

ðŸš€ NEXT STEPS:
1. Expand dataset with 100+ examples (use ChatGPT to generate more)
2. Add model versioning and experiment tracking (MLflow/Weights & Biases)
3. Create a Streamlit or Gradio demo interface
4. Document product requirements and success metrics
5. Write a Medium article about your learnings
6. Push to GitHub with clear README
