In [1]:
!pip install scikit-learn numpy pandas plotly



### Time Series Analysis with the Airline Passengers Dataset

In [2]:

import pandas as pd
from sklearn.metrics import mean_squared_error

# Load the airline passengers dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
df = pd.read_csv(url, parse_dates=['Month'], index_col='Month')

# Display the dataset
df.head()


Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


### Visualization of the Time Series Data

In [3]:

import plotly.express as px

# Plot the time series data
fig = px.line(df, x=df.index, y='Passengers', title='Monthly Airline Passengers (1949-1960)')
fig.show()


### Feature Engineering for Time Series

In [6]:

# Creating lag features
df['lag1'] = df['Passengers'].shift(1)
df['lag2'] = df['Passengers'].shift(2)
df['lag3'] = df['Passengers'].shift(3)

# Dropping NA values created due to lags
df = df.dropna()

# Splitting data into training and test sets
train_size = int(0.8 * len(df))
train, test = df[:train_size], df[train_size:]
X_train, y_train = train.drop('Passengers', axis=1), train['Passengers']
X_test, y_test = test.drop('Passengers', axis=1), test['Passengers']

# Standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

X_train.shape, X_test.shape


((112, 3), (29, 3))

### Linear Regression for Time Series Forecasting

In [7]:

from sklearn.linear_model import LinearRegression

# Train a linear regression model
lr_ts = LinearRegression().fit(X_train, y_train)

# Predict on test set
y_pred_lr = lr_ts.predict(X_test)

# Evaluate the model
mse_lr = mean_squared_error(y_test, y_pred_lr)
mse_lr


2260.288488018069

### k-NN for Time Series Forecasting

In [8]:

from sklearn.neighbors import KNeighborsRegressor

# Train a k-NN regressor
knn_ts = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train)

# Predict on test set
y_pred_knn = knn_ts.predict(X_test)

# Evaluate the model
mse_knn = mean_squared_error(y_test, y_pred_knn)
mse_knn


7171.002758620688

### Decision Trees for Time Series Forecasting

In [9]:

from sklearn.tree import DecisionTreeRegressor

# Train a decision tree regressor
dt_ts = DecisionTreeRegressor().fit(X_train, y_train)

# Predict on test set
y_pred_dt = dt_ts.predict(X_test)

# Evaluate the model
mse_dt = mean_squared_error(y_test, y_pred_dt)
mse_dt


7322.6551724137935

### SVM for Time Series Forecasting

In [10]:

from sklearn.svm import SVR

# Train an SVM regressor
svm_ts = SVR(kernel='linear').fit(X_train, y_train)

# Predict on test set
y_pred_svm = svm_ts.predict(X_test)

# Evaluate the model
mse_svm = mean_squared_error(y_test, y_pred_svm)
mse_svm


4664.906923095005

### K-Means Clustering on Time Series Data

In [11]:

from sklearn.cluster import KMeans

# Using KMeans to cluster the lag features (this is a non-typical use-case for KMeans in time series)
kmeans_ts = KMeans(n_clusters=3).fit(X_train)

# Getting cluster labels for the test set
cluster_labels = kmeans_ts.predict(X_test)
cluster_labels






array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1], dtype=int32)

### Plotly Visualization for Linear Regression:

In [12]:

import plotly.express as px

# Create a DataFrame for visualization
df_lr_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_lr})

# Create a line plot for true vs predicted values
fig = px.line(df_lr_ts, title='True vs Predicted Values for Linear Regression')
fig.add_scatter(y=df_lr_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for k-NN:

In [13]:

# Create a DataFrame for visualization
df_knn_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_knn})

# Create a line plot for true vs predicted values
fig = px.line(df_knn_ts, title='True vs Predicted Values for k-NN')
fig.add_scatter(y=df_knn_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for Decision Trees:

In [14]:

# Create a DataFrame for visualization
df_dt_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_dt})

# Create a line plot for true vs predicted values
fig = px.line(df_dt_ts, title='True vs Predicted Values for Decision Trees')
fig.add_scatter(y=df_dt_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()


### Plotly Visualization for SVM:

In [15]:

# Create a DataFrame for visualization
df_svm_ts = pd.DataFrame({'True Values': y_test, 'Predicted Values': y_pred_svm})

# Create a line plot for true vs predicted values
fig = px.line(df_svm_ts, title='True vs Predicted Values for SVM')
fig.add_scatter(y=df_svm_ts['Predicted Values'], mode='lines', name='Predicted Values')
fig.show()
