<a href="https://colab.research.google.com/github/saksham-42/Time-series/blob/main/Copy_of_TSA_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pmdarima prophet openpyxl --quiet

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
import warnings
warnings.filterwarnings("ignore")

from google.colab import files
uploaded = files.upload()

df = pd.read_excel("AirQualityUCI.xlsx")
df.replace(-200, np.nan, inplace=True)

print("Columns in dataset:", df.columns)
df.columns = df.columns.str.strip()
df['Datetime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'].astype(str), errors='coerce')
df.set_index('Datetime', inplace=True)
df.drop(columns=['Date', 'Time'], inplace=True)
print("Columns in dataset after processing:", df.columns)
df.interpolate(method='time', inplace=True)
df.dropna(inplace=True)
print(df.columns)

def forecast_series(series, steps=48, model_type='ARIMA'):
    train = series.iloc[:int(len(series)*0.9)]
    test = series.iloc[int(len(series)*0.9):]

    if model_type == 'ARIMA':
        model = SARIMAX(train, order=(2,1,2), seasonal_order=(0,0,0,0))
        model_fit = model.fit(disp=False)
        forecast = model_fit.forecast(steps=len(test))
        rmse = np.sqrt(mean_squared_error(test, forecast))

        future_model = model.fit(disp=False)
        future_forecast = future_model.forecast(steps=steps)
        return rmse, future_forecast

    elif model_type == 'Prophet':
        prophet_df = pd.DataFrame({'ds': series.index, 'y': series.values})
        prophet_model = Prophet()
        prophet_model.fit(prophet_df)
        future = prophet_model.make_future_dataframe(periods=steps, freq='H')
        forecast = prophet_model.predict(future)
        forecast_series = forecast[['ds', 'yhat']].set_index('ds').iloc[-steps:, 0]
        rmse = np.sqrt(mean_squared_error(series[-len(test):], forecast['yhat'].iloc[-len(test):]))
        return rmse, forecast_series

target_columns = ['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
                  'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
                  'T', 'RH', 'AH']

result_dict = {}
rmse_dict = {}

for col in target_columns:
    print(f"Processing: {col}")
    rmse, pred = forecast_series(df[col], steps=48, model_type='ARIMA')
    result_dict[col] = pred.values
    rmse_dict[col] = rmse
    print(f"RMSE: {rmse:.2f}")

submission = pd.DataFrame(result_dict)
submission.index.name = 'Hour'

submission.to_excel("submission.xlsx", index=False)
files.download("submission.xlsx")

print("\n RMSE Summary:")
for col, err in rmse_dict.items():
    print(f"{col}: {err:.2f}")


Saving AirQualityUCI.xlsx to AirQualityUCI.xlsx
Columns in dataset: Index(['Date', 'Time', 'CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)',
       'PT08.S2(NMHC)', 'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)',
       'PT08.S5(O3)', 'T', 'RH', 'AH'],
      dtype='object')
Columns in dataset after processing: Index(['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH'],
      dtype='object')
Index(['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH'],
      dtype='object')
Processing: CO(GT)
RMSE: 1.34
Processing: PT08.S1(CO)
RMSE: 202.45
Processing: NMHC(GT)
RMSE: 0.00
Processing: C6H6(GT)
RMSE: 6.57
Processing: PT08.S2(NMHC)
RMSE: 253.24
Processing: NOx(GT)
RMSE: 181.42
Processing: PT08.S3(NOx)
RMSE: 229.27
Processing: NO2(GT)
RMSE: 60.56
Processing: PT08.S

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 RMSE Summary:
CO(GT): 1.34
PT08.S1(CO): 202.45
NMHC(GT): 0.00
C6H6(GT): 6.57
PT08.S2(NMHC): 253.24
NOx(GT): 181.42
PT08.S3(NOx): 229.27
NO2(GT): 60.56
PT08.S4(NO2): 279.92
PT08.S5(O3): 424.68
T: 9.57
RH: 21.44
AH: 0.28
