<a href="https://colab.research.google.com/github/klaxman23/August_pratice/blob/main/Module_13_Case_study_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================
# 1. IMPORT LIBRARIES
# ================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX

from sklearn.metrics import mean_absolute_error, mean_squared_error

plt.style.use("seaborn-v0_8")


In [None]:
# ================================
# 2. LOAD & CLEAN DATA
# ================================
df = pd.read_csv("SeaPlaneTravel.csv")

# Clean column names
df.columns = df.columns.str.strip()

# Rename passenger column safely
df.rename(columns={df.columns[1]: "Passenger"}, inplace=True)

# Convert Month to datetime
df["Month"] = pd.to_datetime(df["Month"])

# Set Month as index
df.set_index("Month", inplace=True)

# Sort index
df.sort_index(inplace=True)

print(df.head())


In [None]:
# ================================
# 3. VISUALIZE DATA
# ================================
plt.figure(figsize=(10,5))
plt.plot(df["Passenger"])
plt.title("Monthly SeaPlane Passenger Traffic")
plt.xlabel("Year")
plt.ylabel("Passengers")
plt.show()


In [None]:
# ================================
# 4. DECOMPOSITION
# ================================
decomposition = seasonal_decompose(df["Passenger"], model="additive", period=12)
decomposition.plot()
plt.show()


In [None]:
# ================================
# 5. TRAIN-TEST SPLIT
# ================================
train = df.iloc[:-24]
test = df.iloc[-24:]

print("Train size:", len(train))
print("Test size :", len(test))


In [None]:
# ================================
# 6. SARIMA MODEL
# ================================
model = SARIMAX(
    train["Passenger"],
    order=(1,1,1),
    seasonal_order=(1,1,1,12),
    enforce_stationarity=False,
    enforce_invertibility=False
)

sarima_model = model.fit()
print(sarima_model.summary())


In [None]:
# ================================
# 7. SAFE PREDICTION (NO KEYERROR)
# ================================
predictions = sarima_model.predict(
    start=len(train),
    end=len(train) + len(test) - 1
)

# Align index manually
predictions.index = test.index

plt.figure(figsize=(10,5))
plt.plot(train.index, train["Passenger"], label="Train")
plt.plot(test.index, test["Passenger"], label="Actual")
plt.plot(predictions.index, predictions, label="Predicted", color="red")
plt.legend()
plt.title("Actual vs Predicted")
plt.show()


In [None]:
# ================================
# 8. EVALUATION
# ================================
mae = mean_absolute_error(test["Passenger"], predictions)
rmse = np.sqrt(mean_squared_error(test["Passenger"], predictions))

print("MAE :", mae)
print("RMSE:", rmse)


In [None]:
# ================================
# 9. FUTURE FORECAST (12 MONTHS)
# ================================
future = sarima_model.get_forecast(steps=12)
future_index = pd.date_range(
    start=df.index[-1] + pd.DateOffset(months=1),
    periods=12,
    freq="MS"
)

forecast_df = pd.DataFrame(
    future.predicted_mean.values,
    index=future_index,
    columns=["Forecast"]
)

forecast_df


In [None]:
# ================================
# 10. FORECAST PLOT
# ================================
plt.figure(figsize=(10,5))
plt.plot(df.index, df["Passenger"], label="Historical")
plt.plot(forecast_df.index, forecast_df["Forecast"], label="Forecast", color="green")
plt.legend()
plt.title("12-Month Passenger Forecast")
plt.show()
