In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
from pathlib import Path

production_data_path: Path = Path("../data/30_prod")

In [None]:
import pandas as pd

csv_market_dataset_name: str = "ticker_data_final.csv"
df: pd.DataFrame = pd.read_csv(
    filepath_or_buffer=production_data_path / "ticker_data_final.csv",
    parse_dates=["Unnamed: 0"],
    sep=",",
)
df.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
assert pd.api.types.is_datetime64_any_dtype(df["Date"])
df.set_index("Date", inplace=True)
df.sort_index(inplace=True)
df.head(5)

In [None]:
df.info()

In [None]:
print(f"Number of days in the dataset: {df.shape[0]}")
print(f"Starting date of the dataset: {df.index[0].strftime('%d %b %Y')}")
print(f"Ending date of the dataset: {df.index[-1].strftime('%d %b %Y')}")

In [None]:
from numpy.typing import NDArray

import numpy as np

ticker_objective: str = "AAPL.MX"

features_: NDArray[np.float64] = df[ticker_objective].values

In [None]:
df[ticker_objective]

In [None]:
import yfinance as yf

yf_ticker = yf.Ticker("AAPL")

In [None]:
yf_ticker.history(period="1d", start="2024-01-01", end="2025-01-01")

In [None]:
features_

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import torch

sns.reset_defaults()
sns.set_context(context='talk', font_scale=0.7)
plt.rcParams['image.cmap'] = 'viridis'

%matplotlib inline

In [None]:
manual_seed: int = 42

torch.manual_seed(manual_seed)
np.random.seed(manual_seed)
torch.use_deterministic_algorithms(True)

In [None]:
from sklearn.preprocessing import MinMaxScaler


def preprocess_financial_data(df, sequence_length=60):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_data = scaler.fit_transform(df)

    sequences = []
    for i in range(len(scaled_data) - sequence_length):
        sequences.append(scaled_data[i : i + sequence_length])

    return np.array(sequences), scaler

In [None]:
sequence_length = 60
sequences, scaler = preprocess_financial_data(features_.reshape(-1, 1), sequence_length)

In [None]:
from torch.utils.data import Dataset, DataLoader

real_data = torch.FloatTensor(sequences)


class TimeSeriesDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


dataset = TimeSeriesDataset(real_data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
import torch.nn as nn


class Generator(nn.Module):
    def __init__(self, latent_dim, seq_length, n_features):
        super(Generator, self).__init__()
        self.seq_length = seq_length
        self.n_features = n_features

        self.model = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, seq_length * n_features),
            nn.Tanh(),
        )

    def forward(self, z):
        output = self.model(z)
        return output.view(-1, self.seq_length, self.n_features)

In [None]:
class Discriminator(nn.Module):
    def __init__(self, seq_length, n_features):
        super(Discriminator, self).__init__()
        self.seq_length = seq_length
        self.n_features = n_features

        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=64,
            num_layers=2,
            batch_first=True,
            dropout=0.2,
        )
        self.leaky_relu = nn.LeakyReLU(0.2)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(64 * seq_length, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        output, _ = self.lstm(x)
        output = self.leaky_relu(output)
        output = self.flatten(output)
        output = self.linear(output)
        return self.sigmoid(output)

In [None]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
latent_dim = 100
seq_length = 60
n_features = 1
lr = 0.0002
epochs = 1000

generator = Generator(latent_dim, seq_length, n_features).to(device)
discriminator = Discriminator(seq_length, n_features).to(device)

optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

criterion = nn.BCELoss()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


G_losses = []
D_losses = []

for epoch in range(epochs):
    for i, real_samples in enumerate(dataloader):
        real_samples = real_samples.to(device)
        batch_size = real_samples.size(0)

        valid = torch.ones(batch_size, 1, device=device)
        fake = torch.zeros(batch_size, 1, device=device)

        optimizer_D.zero_grad()
        print(real_samples.shape)
        discriminator(real_samples)

        real_loss = criterion(discriminator(real_samples), valid)

        z = torch.randn(batch_size, latent_dim, device=device)
        fake_samples = generator(z)
        fake_loss = criterion(discriminator(fake_samples.detach()), fake)

        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()
        optimizer_G.zero_grad()

        z = torch.randn(batch_size, latent_dim, device=device)
        gen_samples = generator(z)
        g_loss = criterion(discriminator(gen_samples), valid)

        g_loss.backward()
        optimizer_G.step()

        G_losses.append(g_loss.item())
        D_losses.append(d_loss.item())

    if epoch % 100 == 0:
        print(
            f"Epoch [{epoch}/{epochs}] Loss D: {d_loss.item():.4f}, Loss G: {g_loss.item():.4f}"
        )

        with torch.no_grad():
            test_noise = torch.randn(1, latent_dim, device=device)

            generated_sample = generator(test_noise)

            generated_sample = generated_sample.cpu().numpy().squeeze()
            real_sample = real_samples[0].cpu().numpy().squeeze()

            plt.figure(figsize=(12, 4))
            plt.subplot(1, 2, 1)
            plt.plot(generated_sample)
            plt.title("Generated Sample")

            plt.subplot(1, 2, 2)
            plt.plot(real_sample)
            plt.title("Real Sample")
            plt.show()

In [None]:
with torch.no_grad():
    fig = plt.figure(figsize=(12, 6))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    for _ in range(10):
        z = torch.randn(1, latent_dim, device=device)
        generated_sample = generator(z).flatten().cpu().numpy()
        ax1.plot(generated_sample)
        ax2.hist(generated_sample, bins=20, alpha=0.3)

    plt.show()

In [None]:
def inverse_transform_synthetic(synthetic_data, scaler):
    """
    Convert generated data back to original price scale
    """
    synthetic_np = synthetic_data.squeeze().cpu().numpy().reshape(-1, 1)
    print(synthetic_np.shape)
    return scaler.inverse_transform(synthetic_np)

In [None]:
with torch.no_grad():
    z = torch.randn(1, latent_dim, device=device)
    generated_sample = generator(z)
    inverse_transform_synthetic(generated_sample, scaler)

In [None]:
torch.save(generator.state_dict(), "generator.pth")
torch.save(discriminator.state_dict(), "discriminator.pth")

plt.figure(figsize=(10, 5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses, label="G")
plt.plot(D_losses, label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
generator.eval()
with torch.no_grad():
    noise = torch.randn(10, latent_dim, device=device)
    synthetic_data = generator(noise).cpu().numpy()

plt.figure(figsize=(12, 6))
for i in range(4):
    plt.subplot(2, 2, i + 1)
    plt.plot(synthetic_data[i].squeeze())
    plt.title(f"Synthetic Sample {i+1}")
plt.tight_layout()
plt.show()

In [None]:
generator.eval()
num_samples = 5
z = torch.randn(num_samples, latent_dim)
generated_sequences = generator(z).detach().numpy()

# Inverse transform and plot
plt.figure(figsize=(12, 6))
for i in range(num_samples):
    sequence = generated_sequences[i].reshape(-1, 1)
    inv_sequence = scaler.inverse_transform(sequence)
    plt.plot(inv_sequence, alpha=0.7, label="Synthetic" if i == 0 else "")

real_sample = scaler.inverse_transform(X[-1].numpy().reshape(-1, 1))
plt.plot(real_sample, alpha=0.7, label="Real")

plt.title("Real vs. Synthetic Financial Time Series")
plt.xlabel("Time Steps")
plt.ylabel("Price")
plt.legend()
plt.show()

In [None]:
generator.eval()
with torch.no_grad():
    noise = torch.randn(10, latent_dim, device=device)
    synthetic_data = generator(noise)

synthetic_prices = inverse_transform_synthetic(synthetic_data, scaler)

plt.figure(figsize=(12, 6))
for i in range(4):
    plt.subplot(2, 2, i + 1)
    plt.plot(synthetic_prices[i])
    plt.title(f"Synthetic Price Series {i+1}")
plt.tight_layout()
plt.show()