## 1. Setup & Load Model

In [1]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

# Paths
MODELS_DIR = Path('../models')
PROCESSED_DATA_DIR = Path('../data/processed')
SRC_DIR = Path('../src')
SRC_DIR.mkdir(exist_ok=True)

print("‚úÖ Setup complete!")


‚úÖ Setup complete!


In [2]:
# Load model and configuration
model = joblib.load(MODELS_DIR / 'tuned_model.joblib')
model_metadata = joblib.load(MODELS_DIR / 'tuned_model_metadata.joblib')
feature_config = joblib.load(PROCESSED_DATA_DIR / 'feature_config.joblib')

ALL_FEATURES = feature_config['all_features']
TARGET = feature_config['target']
MODEL_NAME = model_metadata['model_name']

print(f"ü§ñ Model: {MODEL_NAME}")
print(f"üìä Features: {len(ALL_FEATURES)}")
print(f"üéØ Target: {TARGET}")


ü§ñ Model: random_forest
üìä Features: 18
üéØ Target: fare_amount


## 2. Create Production-Ready Model Package

In [3]:
# Create comprehensive model package for deployment
model_package = {
    'model': model,
    'features': ALL_FEATURES,
    'target': TARGET,
    'model_name': MODEL_NAME,
    'model_type': type(model).__name__,
    'version': '1.0.0',
    'created_at': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
}

# Save production model package
production_model_path = MODELS_DIR / 'production_model.joblib'
joblib.dump(model_package, production_model_path)

print(f"‚úÖ Production model saved: {production_model_path}")
print(f"   Version: {model_package['version']}")
print(f"   Created: {model_package['created_at']}")


‚úÖ Production model saved: ../models/production_model.joblib
   Version: 1.0.0
   Created: 2025-12-13 10:01:23


In [4]:
# Save feature configuration as JSON (for API validation)
feature_info = {
    'features': ALL_FEATURES,
    'target': TARGET,
    'model_name': MODEL_NAME
}

with open(MODELS_DIR / 'feature_config.json', 'w') as f:
    json.dump(feature_info, f, indent=2)

print("‚úÖ Feature config saved as JSON")


‚úÖ Feature config saved as JSON


## 3. Create Inference Class

In [6]:
class NYCTaxiFarePredictor:
    """
    Production-ready predictor for NYC Taxi Fare.
    """
    
    def __init__(self, model_path: str):
        """Load model from path."""
        package = joblib.load(model_path)
        self.model = package['model']
        self.features = package['features']
        self.target = package['target']
        self.model_name = package['model_name']
        self.version = package['version']
        print(f"‚úÖ Loaded {self.model_name} v{self.version}")
    
    def predict(self, data: dict) -> float:
        """
        Make single prediction from dictionary input.
        
        Args:
            data: Dictionary with feature values
            
        Returns:
            Predicted fare amount
        """
        # Convert to DataFrame
        df = pd.DataFrame([data])
        
        # Ensure all features present
        for feature in self.features:
            if feature not in df.columns:
                raise ValueError(f"Missing feature: {feature}")
        
        # Make prediction
        prediction = self.model.predict(df[self.features])[0]
        return float(prediction)
    
    def predict_batch(self, data: pd.DataFrame) -> np.ndarray:
        """
        Make batch predictions from DataFrame.
        
        Args:
            data: DataFrame with feature columns
            
        Returns:
            Array of predicted fares
        """
        return self.model.predict(data[self.features])
    
    def get_info(self) -> dict:
        """Return model information."""
        return {
            'model_name': self.model_name,
            'version': self.version,
            'features': self.features,
            'target': self.target
        }

print("‚úÖ Predictor class defined")


‚úÖ Predictor class defined


## 4. Test Inference Locally

In [7]:
# Initialize predictor
predictor = NYCTaxiFarePredictor(str(production_model_path))


‚úÖ Loaded random_forest v1.0.0


In [8]:
# Load test data for sample inputs
test_df = pd.read_parquet(PROCESSED_DATA_DIR / 'test.parquet')
print(f"üìä Test data loaded: {test_df.shape}")
print(f"\nüìã Features available:")
print(ALL_FEATURES)


üìä Test data loaded: (2393196, 19)

üìã Features available:
['trip_distance', 'passenger_count', 'trip_duration_minutes', 'avg_speed_mph', 'pickup_hour', 'pickup_dayofweek', 'pickup_month', 'hour_sin', 'hour_cos', 'dow_sin', 'dow_cos', 'PULocationID', 'DOLocationID', 'VendorID', 'is_weekend', 'is_rush_hour', 'same_location', 'has_tolls']


In [9]:
# Test single prediction
sample_input = test_df[ALL_FEATURES].iloc[0].to_dict()
actual_fare = test_df[TARGET].iloc[0]

print("üìù Sample Input:")
for key, value in sample_input.items():
    print(f"   {key}: {value}")

# Make prediction
predicted_fare = predictor.predict(sample_input)

print(f"\nüéØ Prediction Results:")
print(f"   Predicted Fare: ${predicted_fare:.2f}")
print(f"   Actual Fare:    ${actual_fare:.2f}")
print(f"   Difference:     ${abs(predicted_fare - actual_fare):.2f}")


üìù Sample Input:
   trip_distance: 0.7
   passenger_count: 1.0
   trip_duration_minutes: 6.7
   avg_speed_mph: 6.26865671641791
   pickup_hour: 10.0
   pickup_dayofweek: 4.0
   pickup_month: 3.0
   hour_sin: 0.49999999999999994
   hour_cos: -0.8660254037844387
   dow_sin: -0.433883739117558
   dow_cos: -0.9009688679024191
   PULocationID: 170.0
   DOLocationID: 162.0
   VendorID: 1.0
   is_weekend: 0.0
   is_rush_hour: 0.0
   same_location: 0.0
   has_tolls: 0.0

üéØ Prediction Results:
   Predicted Fare: $7.56
   Actual Fare:    $7.90
   Difference:     $0.34


In [10]:
# Test batch prediction
batch_size = 100
batch_data = test_df.head(batch_size)

predictions = predictor.predict_batch(batch_data)
actuals = batch_data[TARGET].values

# Calculate metrics
from sklearn.metrics import mean_absolute_error, r2_score

mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f"üìä Batch Prediction Test (n={batch_size}):")
print(f"   MAE: ${mae:.4f}")
print(f"   R¬≤:  {r2:.4f}")
print("\n‚úÖ Model inference working correctly!")


üìä Batch Prediction Test (n=100):
   MAE: $1.1051
   R¬≤:  0.9800

‚úÖ Model inference working correctly!


## 5. Generate FastAPI Script

In [11]:
# Generate FastAPI script
fastapi_code = '''"""FastAPI endpoint for NYC Taxi Fare Prediction."""

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pandas as pd
import joblib
from pathlib import Path
from typing import List, Optional

# Initialize FastAPI app
app = FastAPI(
    title="NYC Taxi Fare Prediction API",
    description="Predict NYC taxi fares using ML model",
    version="1.0.0"
)

# Load model on startup
MODEL_PATH = Path(__file__).parent.parent / "models" / "production_model.joblib"
model_package = None

@app.on_event("startup")
def load_model():
    global model_package
    model_package = joblib.load(MODEL_PATH)
    print(f"‚úÖ Model loaded: {model_package['model_name']} v{model_package['version']}")

# Request/Response models
class PredictionRequest(BaseModel):
    """Input features for prediction."""
    trip_distance: float
    pickup_hour: int
    pickup_dayofweek: int
    passenger_count: int = 1
    # Add other features as needed based on your model
    
    class Config:
        schema_extra = {
            "example": {
                "trip_distance": 2.5,
                "pickup_hour": 14,
                "pickup_dayofweek": 2,
                "passenger_count": 2
            }
        }

class PredictionResponse(BaseModel):
    """Prediction result."""
    predicted_fare: float
    model_name: str
    model_version: str

class HealthResponse(BaseModel):
    """Health check response."""
    status: str
    model_loaded: bool
    model_name: Optional[str]

# Endpoints
@app.get("/")
def root():
    return {"message": "NYC Taxi Fare Prediction API", "docs": "/docs"}

@app.get("/health", response_model=HealthResponse)
def health_check():
    """Check API health and model status."""
    return HealthResponse(
        status="healthy",
        model_loaded=model_package is not None,
        model_name=model_package['model_name'] if model_package else None
    )

@app.post("/predict", response_model=PredictionResponse)
def predict(request: PredictionRequest):
    """Make fare prediction."""
    if model_package is None:
        raise HTTPException(status_code=503, detail="Model not loaded")
    
    try:
        # Convert request to DataFrame
        data = pd.DataFrame([request.dict()])
        
        # Get features and model
        features = model_package['features']
        model = model_package['model']
        
        # Add missing features with default values
        for feature in features:
            if feature not in data.columns:
                data[feature] = 0
        
        # Make prediction
        prediction = model.predict(data[features])[0]
        
        return PredictionResponse(
            predicted_fare=round(float(prediction), 2),
            model_name=model_package['model_name'],
            model_version=model_package['version']
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/model/info")
def model_info():
    """Get model information."""
    if model_package is None:
        raise HTTPException(status_code=503, detail="Model not loaded")
    
    return {
        "model_name": model_package['model_name'],
        "model_type": model_package['model_type'],
        "version": model_package['version'],
        "features": model_package['features'],
        "created_at": model_package['created_at']
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
'''

# Save FastAPI script
api_path = SRC_DIR / 'api.py'
with open(api_path, 'w') as f:
    f.write(fastapi_code)

print(f"‚úÖ FastAPI script saved: {api_path}")
print("\nüìù To run the API:")
print("   cd mlops/src")
print("   uvicorn api:app --reload")
print("\n   Then visit: http://localhost:8000/docs")


‚úÖ FastAPI script saved: ../src/api.py

üìù To run the API:
   cd mlops/src
   uvicorn api:app --reload

   Then visit: http://localhost:8000/docs


## 6. Generate Streamlit App

In [12]:
# Generate Streamlit app
streamlit_code = '''"""Streamlit App for NYC Taxi Fare Prediction."""

import streamlit as st
import pandas as pd
import numpy as np
import joblib
from pathlib import Path

# Page config
st.set_page_config(
    page_title="NYC Taxi Fare Predictor",
    page_icon="üöï",
    layout="wide"
)

# Load model
@st.cache_resource
def load_model():
    model_path = Path(__file__).parent.parent / "models" / "production_model.joblib"
    return joblib.load(model_path)

# Main app
def main():
    st.title("üöï NYC Taxi Fare Predictor")
    st.markdown("Predict taxi fare using Machine Learning")
    
    # Load model
    try:
        model_package = load_model()
        model = model_package['model']
        features = model_package['features']
        
        st.sidebar.success(f"‚úÖ Model: {model_package['model_name']}")
        st.sidebar.info(f"Version: {model_package['version']}")
    except Exception as e:
        st.error(f"‚ùå Error loading model: {e}")
        return
    
    # Sidebar inputs
    st.sidebar.header("üéõÔ∏è Trip Details")
    
    trip_distance = st.sidebar.slider(
        "Trip Distance (miles)",
        min_value=0.1,
        max_value=30.0,
        value=2.5,
        step=0.1
    )
    
    pickup_hour = st.sidebar.slider(
        "Pickup Hour",
        min_value=0,
        max_value=23,
        value=14
    )
    
    pickup_dayofweek = st.sidebar.selectbox(
        "Day of Week",
        options=[0, 1, 2, 3, 4, 5, 6],
        format_func=lambda x: ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"][x],
        index=2
    )
    
    passenger_count = st.sidebar.slider(
        "Passenger Count",
        min_value=1,
        max_value=6,
        value=1
    )
    
    # Create input DataFrame
    input_data = {
        'trip_distance': trip_distance,
        'pickup_hour': pickup_hour,
        'pickup_dayofweek': pickup_dayofweek,
        'passenger_count': passenger_count
    }
    
    # Add missing features with default values
    df = pd.DataFrame([input_data])
    for feature in features:
        if feature not in df.columns:
            df[feature] = 0
    
    # Main content
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("üìù Trip Information")
        st.write(f"**Distance:** {trip_distance} miles")
        st.write(f"**Time:** {pickup_hour}:00")
        st.write(f"**Day:** {['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][pickup_dayofweek]}")
        st.write(f"**Passengers:** {passenger_count}")
    
    with col2:
        st.subheader("üí∞ Fare Prediction")
        
        if st.button("üîÆ Predict Fare", type="primary"):
            with st.spinner("Calculating..."):
                prediction = model.predict(df[features])[0]
                
                st.metric(
                    label="Predicted Fare",
                    value=f"${prediction:.2f}"
                )
                
                # Confidence message
                if prediction < 10:
                    st.info("üí° Short trip - typical for nearby destinations")
                elif prediction < 30:
                    st.info("üí° Medium trip - common for cross-borough travel")
                else:
                    st.info("üí° Long trip - possibly to/from airports")
    
    # Footer
    st.markdown("---")
    st.markdown("**üìä Model Info**")
    with st.expander("View Model Details"):
        st.json({
            "model_name": model_package['model_name'],
            "model_type": model_package['model_type'],
            "version": model_package['version'],
            "num_features": len(features),
            "created_at": model_package['created_at']
        })

if __name__ == "__main__":
    main()
'''

# Save Streamlit app
app_path = SRC_DIR / 'app.py'
with open(app_path, 'w') as f:
    f.write(streamlit_code)

print(f"‚úÖ Streamlit app saved: {app_path}")
print("\nüìù To run the app:")
print("   cd mlops/src")
print("   streamlit run app.py")


‚úÖ Streamlit app saved: ../src/app.py

üìù To run the app:
   cd mlops/src
   streamlit run app.py


## 7. Generate Requirements File

In [13]:
# Generate requirements.txt
requirements = """# Core ML
pandas>=1.5.0
numpy>=1.21.0
scikit-learn>=1.0.0
joblib>=1.1.0

# API & Web
fastapi>=0.100.0
uvicorn>=0.22.0
streamlit>=1.25.0
pydantic>=2.0.0

# Monitoring
evidently>=0.7.0

# Data
pyarrow>=12.0.0

# Visualization
matplotlib>=3.5.0
seaborn>=0.12.0
"""

req_path = Path('../requirements.txt')
with open(req_path, 'w') as f:
    f.write(requirements)

print(f"‚úÖ Requirements saved: {req_path}")


‚úÖ Requirements saved: ../requirements.txt


## 8. Test API Locally (Optional)

In [14]:
# Simulate API request locally
print("üìù Simulating API Request...")
print("="*50)

# Sample request
api_request = {
    "trip_distance": 2.5,
    "pickup_hour": 14,
    "pickup_dayofweek": 2,
    "passenger_count": 2
}

print("Request Body:")
print(json.dumps(api_request, indent=2))

# Create DataFrame from request
df_request = pd.DataFrame([api_request])
for feature in ALL_FEATURES:
    if feature not in df_request.columns:
        df_request[feature] = 0

# Make prediction
prediction = model.predict(df_request[ALL_FEATURES])[0]

# Response
api_response = {
    "predicted_fare": round(float(prediction), 2),
    "model_name": MODEL_NAME,
    "model_version": "1.0.0"
}

print("\nResponse Body:")
print(json.dumps(api_response, indent=2))


üìù Simulating API Request...
Request Body:
{
  "trip_distance": 2.5,
  "pickup_hour": 14,
  "pickup_dayofweek": 2,
  "passenger_count": 2
}

Response Body:
{
  "predicted_fare": 21.16,
  "model_name": "random_forest",
  "model_version": "1.0.0"
}


## 9. Deployment Summary

In [15]:
print("="*70)
print("              üì¶ DEPLOYMENT SUMMARY")
print("="*70)

print(f"""
MODEL INFORMATION:
   Model: {MODEL_NAME}
   Version: 1.0.0
   Features: {len(ALL_FEATURES)}
   Target: {TARGET}

GENERATED FILES:
   üìÅ models/
      - production_model.joblib (model package)
      - feature_config.json
   
   üìÅ src/
      - api.py (FastAPI endpoint)
      - app.py (Streamlit UI)
   
   üìÑ requirements.txt

HOW TO RUN:

   1. FastAPI (REST API):
      cd mlops/src
      pip install fastapi uvicorn
      uvicorn api:app --reload
      ‚Üí Open: http://localhost:8000/docs

   2. Streamlit (Web UI):
      cd mlops/src
      pip install streamlit
      streamlit run app.py
      ‚Üí Open: http://localhost:8501

API ENDPOINTS:
   GET  /           ‚Üí Welcome message
   GET  /health     ‚Üí Health check
   POST /predict    ‚Üí Make prediction
   GET  /model/info ‚Üí Model information
""")

print("="*70)
print("‚úÖ Deployment Preparation Complete!")
print("="*70)


              üì¶ DEPLOYMENT SUMMARY

MODEL INFORMATION:
   Model: random_forest
   Version: 1.0.0
   Features: 18
   Target: fare_amount

GENERATED FILES:
   üìÅ models/
      - production_model.joblib (model package)
      - feature_config.json
   
   üìÅ src/
      - api.py (FastAPI endpoint)
      - app.py (Streamlit UI)
   
   üìÑ requirements.txt

HOW TO RUN:

   1. FastAPI (REST API):
      cd mlops/src
      pip install fastapi uvicorn
      uvicorn api:app --reload
      ‚Üí Open: http://localhost:8000/docs

   2. Streamlit (Web UI):
      cd mlops/src
      pip install streamlit
      streamlit run app.py
      ‚Üí Open: http://localhost:8501

API ENDPOINTS:
   GET  /           ‚Üí Welcome message
   GET  /health     ‚Üí Health check
   POST /predict    ‚Üí Make prediction
   GET  /model/info ‚Üí Model information

‚úÖ Deployment Preparation Complete!


---

## üîó Untuk Dosen: Deployment Architecture

### Production Architecture

```
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê     ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê     ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ   Streamlit UI  ‚îÇ‚îÄ‚îÄ‚îÄ‚îÄ‚ñ∂‚îÇ   FastAPI       ‚îÇ‚îÄ‚îÄ‚îÄ‚îÄ‚ñ∂‚îÇ   ML Model      ‚îÇ
‚îÇ   (Frontend)    ‚îÇ     ‚îÇ   (Backend)     ‚îÇ     ‚îÇ   (Inference)   ‚îÇ
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò     ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò     ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
         ‚îÇ                      ‚îÇ                       ‚îÇ
         ‚ñº                      ‚ñº                       ‚ñº
    User Input            Request/Response        Prediction
```

### Deployment Options

| Platform | Streamlit | FastAPI | Cost |
|----------|-----------|---------|------|
| Local | ‚úÖ | ‚úÖ | Free |
| Streamlit Cloud | ‚úÖ | ‚ùå | Free |
| Heroku | ‚úÖ | ‚úÖ | Free tier |
| AWS/GCP | ‚úÖ | ‚úÖ | Pay-as-you-go |

### Next Steps
1. Containerize with Docker
2. Set up CI/CD pipeline
3. Add authentication
4. Set up monitoring dashboard

In [17]:

# Load test data
test_df = pd.read_parquet('data/processed/test.parquet')
train_df = pd.read_parquet('data/processed/train.parquet')

print('üìä TRAIN DATA HEAD:')
print('='*80)
print(train_df.head().to_string())
print(f'\nShape: {train_df.shape}')
print(f'Columns: {list(train_df.columns)}')

print('\n\nüìä TEST DATA HEAD:')
print('='*80)
print(test_df.head().to_string())
print(f'\nShape: {test_df.shape}')



FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/test.parquet'