In [1]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv("worldometer_coronavirus_daily_data.csv")

# Display basic info
print(data.info())

# Handle missing values
# We can fill missing values with forward-fill, backward-fill, or the mean value
data["daily_new_cases"].fillna(method="ffill", inplace=True)
data["active_cases"].fillna(method="ffill", inplace=True)
data["daily_new_deaths"].fillna(method="ffill", inplace=True)
data["cumulative_total_deaths"].fillna(method="ffill", inplace=True)

# Convert 'date' column to datetime
data["date"] = pd.to_datetime(data["date"])

# Sort data by date (to maintain sequence for time series)
data.sort_values("date", inplace=True)

# Filter data for the desired country (e.g., India)
india_data = data[data["country"] == "India"].copy()

# Drop the 'country' column since it's constant now
india_data.drop(columns=["country"], inplace=True)

# Reset index after sorting
india_data.reset_index(drop=True, inplace=True)

# Inspect the preprocessed data
print(india_data.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184787 entries, 0 to 184786
Data columns (total 7 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   date                     184787 non-null  object 
 1   country                  184787 non-null  object 
 2   cumulative_total_cases   184787 non-null  float64
 3   daily_new_cases          174329 non-null  float64
 4   active_cases             166747 non-null  float64
 5   cumulative_total_deaths  178227 non-null  float64
 6   daily_new_deaths         157850 non-null  float64
dtypes: float64(5), object(2)
memory usage: 9.9+ MB
None
        date  cumulative_total_cases  daily_new_cases  active_cases  \
0 2020-02-15                     3.0            115.0           0.0   
1 2020-02-16                     3.0              0.0           0.0   
2 2020-02-17                     3.0              0.0           0.0   
3 2020-02-18                     3.0              0.0 

In [2]:
# Create lag features for the last 3 days (lags help the model to look at the past values)
for lag in range(1, 4):
    india_data[f"lag_{lag}"] = india_data["cumulative_total_cases"].shift(lag)

# Drop rows with NaN values introduced by lagging
india_data.dropna(inplace=True)

# Features and target selection
features = [
    "daily_new_cases",
    "active_cases",
    "cumulative_total_deaths",
    "daily_new_deaths",
] + [f"lag_{lag}" for lag in range(1, 4)]
target = "cumulative_total_cases"

X = india_data[features]
y = india_data[target]

print("Feature matrix (X):", X.shape)
print("Target vector (y):", y.shape)


Feature matrix (X): (817, 7)
Target vector (y): (817,)


In [3]:
from sklearn.preprocessing import MinMaxScaler

# Initialize scalers
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# Scale features and target
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(np.array(y).reshape(-1, 1))

# Check the scaling results
print("Scaled feature matrix (X):", X_scaled.shape)
print("Scaled target vector (y):", y_scaled.shape)


Scaled feature matrix (X): (817, 7)
Scaled target vector (y): (817, 1)


In [4]:
# Create sequences for the time series model
def create_sequences(X, y, time_step):
    Xs, ys = [], []
    for i in range(len(X) - time_step):
        Xs.append(X[i : (i + time_step)])
        ys.append(y[i + time_step])
    return np.array(Xs), np.array(ys)


# Set time step (e.g., 60 days)
time_step = 60

# Create sequences
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_step)

# Split data into training and testing sets (80/20 split)
train_size = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]

print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)


Training data shape: (605, 60, 7) (605, 1)
Testing data shape: (152, 60, 7) (152, 1)


In [23]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Layer,
    Dense,
    Dropout,
    LayerNormalization,
    Input,
    MultiHeadAttention,
    GlobalAveragePooling1D,
)
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2


# Positional Encoding (same as before)
class PositionalEncoding(Layer):
    def __init__(self, sequence_len, d_model, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.sequence_len = sequence_len
        self.d_model = d_model
        self.pos_encoding = self.get_positional_encoding(sequence_len, d_model)

    def get_positional_encoding(self, sequence_len, d_model):
        positions = np.arange(sequence_len)[:, np.newaxis]
        dims = np.arange(d_model)[np.newaxis, :]

        angle_rates = 1 / np.power(10000, (2 * (dims // 2)) / np.float32(d_model))
        angle_rads = positions * angle_rates

        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

        pos_encoding = angle_rads[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, : tf.shape(inputs)[1], :]


# Transformer Encoder Block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0, l2_value=0):
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(
        inputs, inputs
    )
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    x = Dense(ff_dim, activation="relu", kernel_regularizer=l2(l2_value))(x)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    return x


# Build Transformer model
def build_transformer_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    l2_value=0
):
    inputs = Input(shape=input_shape)
    x = PositionalEncoding(input_shape[0], input_shape[1])(inputs)

    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout, l2_value)

    x = GlobalAveragePooling1D()(x)
    x = Dense(mlp_units, activation="relu", kernel_regularizer=l2(l2_value))(x)
    x = Dropout(dropout)(x)
    outputs = Dense(1)(x)

    return Model(inputs, outputs)


# Model parameters
input_shape = (X_train.shape[1], X_train.shape[2])
head_size = 128
num_heads = 4
ff_dim = 128
num_transformer_blocks = 2
mlp_units = 64
dropout = 0.3
l2_value = 1e-3

# Build and compile the model
transformer_model = build_transformer_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout,
    l2_value,
)
# Learning rate scheduler
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3, decay_steps=10000, decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

transformer_model.compile(optimizer=optimizer, loss="mean_squared_error")

# Show model summary
transformer_model.summary()


In [24]:
from tensorflow.keras.callbacks import EarlyStopping

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True
)

# Train the model
history = transformer_model.fit(
    X_train,
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
)


Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 183ms/step - loss: 1.1593 - val_loss: 0.3693
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 134ms/step - loss: 0.3585 - val_loss: 0.5037
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 138ms/step - loss: 0.3150 - val_loss: 0.4353
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 136ms/step - loss: 0.3072 - val_loss: 0.4855
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 135ms/step - loss: 0.3039 - val_loss: 0.4265
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 140ms/step - loss: 0.2911 - val_loss: 0.3563
Epoch 7/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 132ms/step - loss: 0.2792 - val_loss: 0.2595
Epoch 8/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 136ms/step - loss: 0.2312 - val_loss: 0.1953
Epoch 9/100
[1m19/19[0m [32m

In [22]:
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions
train_predictions = transformer_model.predict(X_train)
test_predictions = transformer_model.predict(X_test)

# Inverse scale the predictions
train_predictions = scaler_y.inverse_transform(train_predictions)
test_predictions = scaler_y.inverse_transform(test_predictions)
y_train_actual = scaler_y.inverse_transform(y_train)
y_test_actual = scaler_y.inverse_transform(y_test)

# Calculate RMSE and R^2
train_rmse = np.sqrt(mean_squared_error(y_train_actual, train_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test_actual, test_predictions))
train_r2 = r2_score(y_train_actual, train_predictions)
test_r2 = r2_score(y_test_actual, test_predictions)

# Print results
print(f"Train RMSE: {train_rmse}, Train R²: {train_r2}")
print(f"Test RMSE: {test_rmse}, Test R²: {test_r2}")


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Train RMSE: 8940716.589080242, Train R²: 0.5119099547876249
Test RMSE: 2939859.0048004193, Test R²: 0.16276789318976526
