# Assignment 11 – Applied Component

# Step 1 – Train Your Model

In [1]:
import pandas as pd

# Load dataset safely with detected encoding
df = pd.read_csv(r"C:\Users\sarah\Desktop\BC\Fall 2025\MESA8414 Applied AI and Machine Learning\Assignment\Week 11 assignment\monthly-car-sales.csv")

display(df.head())


Unnamed: 0,Month,Sales
0,1960-01,6550
1,1960-02,8728
2,1960-03,12026
3,1960-04,14395
4,1960-05,14587


In [2]:
# Check for missing values
print("Missing values per column:\n", df.isnull().sum())    # FILL IN ALL BLANKS

# Handle missing data
df = df.fillna(df.mean(numeric_only=True))

display(df.describe())



Missing values per column:
 Month    0
Sales    0
dtype: int64


Unnamed: 0,Sales
count,108.0
mean,14595.111111
std,4525.213913
min,5568.0
25%,11391.25
50%,14076.0
75%,17595.75
max,26099.0


In [3]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# --- 1. Create lag features for the past 6 months ---
for lag in range(1, 7):
    df[f'Sales_lag_{lag}'] = df['Sales'].shift(lag)

# Drop the first 6 rows that have NaN lag values
df = df.dropna().reset_index(drop=True)

# --- 2. Define features (X) and target (y) ---
X = df[[f'Sales_lag_{i}' for i in range(1, 7)]]
y = df['Sales']

# --- 3. Split into train/test sets (no shuffle for time series) ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# --- 4. Train XGBoost Regressor ---
model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42
)
model.fit(X_train, y_train)

# --- 5. Predict and evaluate ---
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse:.2f}")

# --- 6. Compare actual vs predicted ---
results = pd.DataFrame({
    'Month': df.loc[X_test.index, 'Month'],
    'Actual_Sales': y_test,
    'Predicted_Sales': y_pred
})
print(results.head())


RMSE: 2897.63
      Month  Actual_Sales  Predicted_Sales
81  1967-04         19692     21437.453125
82  1967-05         24081     21224.123047
83  1967-06         22114     18793.589844
84  1967-07         14220     17344.720703
85  1967-08         13434     12679.627930


# Step 2: Save the Model in JSON Format

In [4]:
# Save the trained XGBoost model
model.get_booster().save_model("model/model.json")

print("Model saved successfully as 'model/model.json'")


Model saved successfully as 'model/model.json'


# Step 3: Train Your Model

In [5]:
pip install fastapi uvicorn xgboost

Note: you may need to restart the kernel to use updated packages.


In [6]:
%pip install nest_asyncio

Note: you may need to restart the kernel to use updated packages.


In [18]:
from fastapi import FastAPI
from pydantic import BaseModel
from xgboost import XGBRegressor
import numpy as np

# Initialize FastAPI app
app = FastAPI(title="Sales Forecast API")

# Load the trained XGBoost model
model = XGBRegressor()
model.load_model("model/model.json")

# Define input schema
class SalesInput(BaseModel):
    sales_lags: list[float]

# Root endpoint
@app.get("/")
def read_root():
    return {"message": "Welcome to the Sales Forecast API!"}

# Prediction endpoint
@app.post("/predict")
def predict_sales(data: SalesInput):
    # Convert list to numpy array with correct shape (1, 6)
    X_input = np.array(data.sales_lags).reshape(1, -1)

    # Make prediction
    prediction = model.predict(X_input)[0]

    return {"predicted_sales_next_month": float(prediction)}


TypeError: write() argument must be str, not FastAPI

can open via http://127.0.0.1:8000/doc

# Step 4 Save the Model in JSON Format

In [19]:
import os

# Make sure a 'model' folder exists
os.makedirs("model", exist_ok=True)

app_code = """from fastapi import FastAPI
from pydantic import BaseModel
from xgboost import XGBRegressor
import numpy as np

# Initialize FastAPI app
app = FastAPI(title="Sales Forecast API")

# Load the trained XGBoost model
model = XGBRegressor()
model.load_model("model/model.json")

# Define input schema
class SalesInput(BaseModel):
    sales_lags: list[float]

# Root endpoint
@app.get("/")
def read_root():
    return {"message": "Welcome to the Sales Forecast API!"}

# Prediction endpoint
@app.post("/predict")
def predict_sales(data: SalesInput):
    # Convert list to numpy array with correct shape (1, 6)
    X_input = np.array(data.sales_lags).reshape(1, -1)

    # Make prediction
    prediction = model.predict(X_input)[0]

    return {"predicted_sales_next_month": float(prediction)}
"""

# Save to file
with open("app.py", "w", encoding="utf-8") as f:
    f.write(app_code)

print("Saved app.py successfully!")


Saved app.py successfully!


# Step 5 Deploy with Render

In [20]:
import os
os.getcwd()


'C:\\Users\\sarah\\anaconda_projects\\Applied AI and ML'

In [21]:
os.listdir()


['.ipynb_checkpoints',
 'amazon-alexa.csv',
 'app.py',
 'Assignment10_Student.ipynb',
 'Assignment3 (1).ipynb',
 'Assignment4_Applied.ipynb',
 'Assignment6_Applied_AmazonReviews (1).ipynb',
 'Assignment6_Sarah Her.ipynb',
 'Assignment7_Applied_Forecasting_Student_Final.ipynb',
 'Assignment8_Applied_CreditRisk_Student.ipynb',
 'Assignment9_BlackjackLearner_Student.ipynb',
 'Copy_of_Week8_Behind_the_Scenes_Student.ipynb',
 'frontend',
 'MESA8414_Her_Assignment 3.ipynb',
 'MESA8414_Her_Assignment 4.ipynb',
 'model',
 'Sarah Her_Assignment8_Applied_CreditRisk_Student.ipynb',
 'Sarah_Assignment10.ipynb',
 'Sarah_Assignment11.ipynb',
 'Sarah_Assignment9_BlackjackLearner.ipynb',
 'Sarah_Her_Assignment7.ipynb',
 'spy_price_forecast.csv',
 'timeseries_9.25.25.ipynb',
 'timeseries_proj_sarah.ipynb',
 'timeseries_proj_sarah_9.18.25.ipynb',
 'Untitled.ipynb',
 'untitled.txt']

In [None]:
!conda install -y git


In [None]:
!git --version

In [22]:
!git init
!git config user.name "sarah-her"
!git config user.email "hersa@bc.edu"


'git' is not recognized as an internal or external command,
operable program or batch file.
'git' is not recognized as an internal or external command,
operable program or batch file.
'git' is not recognized as an internal or external command,
operable program or batch file.
