In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler

np.random.seed(42)

In [None]:
df = pd.read_csv("AirPassengers.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

# EDA

In [None]:
plt.figure(figsize=(25, 5))
plt.plot_date(df["Month"], df["#Passengers"], "b")
plt.xticks(rotation=90)
plt.show()

In [None]:
df["Date"] = pd.to_datetime(df["Month"])
df.drop("Month", axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df["Month"] = df["Date"].apply(lambda date: date.month)
df["Year"] = df["Date"].apply(lambda date: date.year)

In [None]:
df.head()

In [None]:
for year in df["Year"].unique():
    plt.figure()
    ax = sns.barplot(data=df[df["Year"] == year], x="Month", y="#Passengers")
    for container in ax.containers:
        ax.bar_label(container)
    plt.title(f"Passengers per Month in {year}")
    plt.show()

In [None]:
passengers_per_years = df.groupby("Year")["#Passengers"].sum()

In [None]:
passengers_per_years.plot(kind="bar")

In [None]:
passengers_per_years.plot(kind="pie", autopct="%0.1f%%")

In [None]:
passengers_per_months = df.groupby("Month")["#Passengers"].sum()

In [None]:
passengers_per_months.plot(kind="bar")

In [None]:
passengers_per_months.plot(kind="pie", autopct="%0.1f%%")

# Preprocess

In [None]:
df.drop(["Year", "Month"], axis=1, inplace=True)

In [None]:
df.set_index("Date", inplace=True)

In [None]:
df.head()

In [None]:
df.plot()

In [None]:
passengers = df["#Passengers"]
passengers = np.array(passengers).reshape(-1, 1)

In [None]:
scaler = MinMaxScaler()
passengers_scaled = scaler.fit_transform(passengers)

In [None]:
train_size = int(len(passengers_scaled) * 0.7)
test_size = len(passengers_scaled) - train_size

In [None]:
print("Train size:", train_size)
print("Test size:", test_size)

In [None]:
train = passengers_scaled[0:train_size, :] 
test = passengers_scaled[train_size:len(passengers_scaled), :]

In [None]:
def dataset(data, steps=1):
    data_x, data_y = [], []
    for i in range(len(data) - steps - 1):
        a = data[i:(i + steps), 0]
        b = data[i + steps, 0]
        data_x.append(a)
        data_y.append(b)
    data_x = np.array(data_x)
    data_y = np.array(data_y)
    return data_x, data_y

In [None]:
X_train, y_train = dataset(train)
X_test, y_test = dataset(test)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Model

In [None]:
model = Sequential()
model.add(LSTM(4, input_shape=(1,1)))
model.add(Dense(1))
model.compile(loss="mean_squared_error", optimizer="adam")

In [None]:
history = model.fit(X_train, y_train, batch_size=1, epochs=25)

In [None]:
model.save("LSTM_air.h5")

In [None]:
y_pred = model.predict(X_test)
y_test = y_test.reshape(-1, 1)
y_test = scaler.inverse_transform(y_test)

In [None]:
y_pred = scaler.inverse_transform(y_pred)

In [None]:
plt.figure()
plt.plot(y_test, label="Actual")
plt.plot(y_pred, label="Predicted")
plt.legend()
plt.show()