### Time Series Analysis with the Airline Passengers Dataset

In [None]:

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Load the airline passengers dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
df = pd.read_csv(url, parse_dates=['Month'], index_col='Month')

# Display the dataset
df.head()


### Visualization of the Time Series Data

In [None]:

import plotly.express as px

# Plot the time series data
fig = px.line(df, x=df.index, y='Passengers', title='Monthly Airline Passengers (1949-1960)')
fig.show()


### Linear Regression for Time Series Forecasting

In [None]:

from sklearn.linear_model import LinearRegression
import numpy as np

# Prepare data for linear regression
X = np.array(range(len(df))).reshape(-1, 1)
y = df['Passengers'].values

# Train-test split
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Train a linear regression model
lr = LinearRegression().fit(X_train, y_train)

# Predict on test set
y_pred = lr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mse


### Time Series Forecasting with ARIMA

In [None]:

# Fit an ARIMA model
model = ARIMA(df, order=(5,1,0))
model_fit = model.fit(disp=0)

# Forecast
forecast = model_fit.forecast(steps=12)[0]
forecast


### Feature Engineering for Time Series

In [None]:

# Creating lag features
df['lag1'] = df['Passengers'].shift(1)
df['lag2'] = df['Passengers'].shift(2)
df['lag3'] = df['Passengers'].shift(3)

# Dropping NA values created due to lags
df = df.dropna()

# Splitting data into training and test sets
train_size = int(0.8 * len(df))
train, test = df[:train_size], df[train_size:]
X_train, y_train = train.drop('Passengers', axis=1), train['Passengers']
X_test, y_test = test.drop('Passengers', axis=1), test['Passengers']

# Standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train.shape, X_test.shape


### Linear Regression for Time Series Forecasting

In [None]:

from sklearn.linear_model import LinearRegression

# Train a linear regression model
lr_ts = LinearRegression().fit(X_train, y_train)

# Predict on test set
y_pred_lr = lr_ts.predict(X_test)

# Evaluate the model
mse_lr = mean_squared_error(y_test, y_pred_lr)
mse_lr


### k-NN for Time Series Forecasting

In [None]:

from sklearn.neighbors import KNeighborsRegressor

# Train a k-NN regressor
knn_ts = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train)

# Predict on test set
y_pred_knn = knn_ts.predict(X_test)

# Evaluate the model
mse_knn = mean_squared_error(y_test, y_pred_knn)
mse_knn


### Decision Trees for Time Series Forecasting

In [None]:

from sklearn.tree import DecisionTreeRegressor

# Train a decision tree regressor
dt_ts = DecisionTreeRegressor().fit(X_train, y_train)

# Predict on test set
y_pred_dt = dt_ts.predict(X_test)

# Evaluate the model
mse_dt = mean_squared_error(y_test, y_pred_dt)
mse_dt


### SVM for Time Series Forecasting

In [None]:

from sklearn.svm import SVR

# Train an SVM regressor
svm_ts = SVR(kernel='linear').fit(X_train, y_train)

# Predict on test set
y_pred_svm = svm_ts.predict(X_test)

# Evaluate the model
mse_svm = mean_squared_error(y_test, y_pred_svm)
mse_svm


### K-Means Clustering on Time Series Data

In [None]:

from sklearn.cluster import KMeans

# Using KMeans to cluster the lag features (this is a non-typical use-case for KMeans in time series)
kmeans_ts = KMeans(n_clusters=3).fit(X_train)

# Getting cluster labels for the test set
cluster_labels = kmeans_ts.predict(X_test)
cluster_labels


### Plotly Visualization for Linear Regression:

In [None]:

import plotly.express as px

# Create a DataFrame for visualization
df_lr_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_lr})

# Create a line plot for true vs predicted values
fig = px.line(df_lr_ts, title='True vs Predicted Values for Linear Regression')
fig.add_scatter(y=df_lr_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for k-NN:

In [None]:

# Create a DataFrame for visualization
df_knn_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_knn})

# Create a line plot for true vs predicted values
fig = px.line(df_knn_ts, title='True vs Predicted Values for k-NN')
fig.add_scatter(y=df_knn_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for Decision Trees:

In [None]:

# Create a DataFrame for visualization
df_dt_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_dt})

# Create a line plot for true vs predicted values
fig = px.line(df_dt_ts, title='True vs Predicted Values for Decision Trees')
fig.add_scatter(y=df_dt_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for SVM:

In [None]:

# Create a DataFrame for visualization
df_svm_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_svm})

# Create a line plot for true vs predicted values
fig = px.line(df_svm_ts, title='True vs Predicted Values for SVM')
fig.add_scatter(y=df_svm_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()
