In [1]:
import torch
import numpy as np
import math
import torch.nn.functional as F
import json
import dill
from tqdm import tqdm
from scipy.integrate import odeint
from scipy.special import legendre
from itertools import combinations_with_replacement

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## SINDy Utils

In [3]:
def sindy_library_pt(z, latent_dim, poly_order, include_sine=False):
    """
    Description: Builds the SINDy library for a first-order dynamical system.

    Args:
        z (torch.Tensor): Input tensor of shape (batch_size, latent_dim), representing latent states.
        latent_dim (int): Number of latent variables (dimensions).
        poly_order (int): Maximum degree of polynomial terms to include in the library.
        include_sine (bool): Whether to include sine terms in the library.

    Returns:
        torch.Tensor: A matrix (batch_size, library_size) where each column is a function of z.
    """

    # Initialize the library with a column of ones. The number of rows is equal to batch size.
    library = [torch.ones(z.size(0)).to(device)]

    # Prepare to loop over all variable combinations
    sample_list = range(latent_dim)

    for n in range(1, poly_order + 1):
        # Get all combinations (with replacement) of latent_dim variables of total degree n
        list_combinations = list(combinations_with_replacement(sample_list, n))

        for combination in list_combinations:
            # For each combination, compute the product of the corresponding columns in z
            # e.g., z[:, [0, 0]] -> z_0^2, z[:, [1, 2]] -> z_1 * z_2
            term = torch.prod(z[:, combination], dim=1)
            library.append(term.to(device))  # Add to the library (on GPU)

    # Optionally add sine terms of each latent variable
    if include_sine:
        for i in range(latent_dim):
            library.append(
                torch.sin(z[:, i])
            )  # Automatically on correct device since z is

    # Stack all features column-wise into a single tensor of shape (batch_size, num_features)
    return torch.stack(library, dim=1).to(device)

def sindy_library_pt_order2(z, dz, latent_dim, poly_order, include_sine=False):
    """
    Build the SINDy library for a second-order system.
    """
    library = [torch.ones(z.size(0)).to(device)]  # initialize library

    # concatenate z and dz
    z_combined = torch.cat([z, dz], dim=1)

    sample_list = range(2 * latent_dim)
    list_combinations = list()

    for n in range(1, poly_order + 1):
        list_combinations = list(combinations_with_replacement(sample_list, n))
        for combination in list_combinations:
            library.append(
                torch.prod(z_combined[:, combination], dim=1).to(device)
            )

    # add sine terms if included
    if include_sine:
        for i in range(2 * latent_dim):
            library.append(torch.sin(z_combined[:, i]))

    return torch.stack(library, dim=1).to(device)


def library_size(latent_dim, poly_order, include_sine=False):
    def f(d, k):
        return math.comb(d + k - 1, k)

    total = 0
    for i in range(poly_order + 1):
        total += f(latent_dim, i)

    if include_sine:
        total += latent_dim  # Add one sine term per latent variable

    return total

# Generate data

## Lorenz

### Lorenz Code

In [4]:
def get_lorenz_data(n_ics, noise_strength=0):
    """
    Generate a set of Lorenz training data for multiple random initial conditions.

    Arguments:
        n_ics - Integer specifying the number of initial conditions to use.
        noise_strength - Amount of noise to add to the data.

    Return:
        data - Dictionary containing elements of the dataset. See generate_lorenz_data()
        doc string for list of contents.
    """
    t = np.arange(0, 5, 0.02)
    n_steps = t.size
    input_dim = 128

    ic_means = np.array([0, 0, 25])
    ic_widths = 2 * np.array([36, 48, 41])

    # training data
    ics = ic_widths * (np.random.rand(n_ics, 3) - 0.5) + ic_means
    data = generate_lorenz_data(
        ics,
        t,
        input_dim,
        linear=False,
        normalization=np.array([1 / 40, 1 / 40, 1 / 40]),
    )
    data["x"] = data["x"].reshape((-1, input_dim)) + noise_strength * np.random.randn(
        n_steps * n_ics, input_dim
    )
    data["dx"] = data["dx"].reshape((-1, input_dim)) + noise_strength * np.random.randn(
        n_steps * n_ics, input_dim
    )
    data["ddx"] = data["ddx"].reshape(
        (-1, input_dim)
    ) + noise_strength * np.random.randn(n_steps * n_ics, input_dim)

    return data

def simulate_lorenz(z0, t, sigma=10.0, beta=8 / 3, rho=28.0):
    """
    Simulate the Lorenz dynamics.

    Arguments:
        z0 - Initial condition in the form of a 3-value list or array.
        t - Array of time points at which to simulate.
        sigma, beta, rho - Lorenz parameters

    Returns:
        z, dz, ddz - Arrays of the trajectory values and their 1st and 2nd derivatives.
    """
    f = lambda z, t: [
        sigma * (z[1] - z[0]),
        z[0] * (rho - z[2]) - z[1],
        z[0] * z[1] - beta * z[2],
    ]
    df = lambda z, dz, t: [
        sigma * (dz[1] - dz[0]),
        dz[0] * (rho - z[2]) + z[0] * (-dz[2]) - dz[1],
        dz[0] * z[1] + z[0] * dz[1] - beta * dz[2],
    ]

    z = odeint(f, z0, t)

    dt = t[1] - t[0]
    dz = np.zeros(z.shape)
    ddz = np.zeros(z.shape)
    for i in range(t.size):
        dz[i] = f(z[i], dt * i)
        ddz[i] = df(z[i], dz[i], dt * i)
    return z, dz, ddz

def lorenz_coefficients(normalization, poly_order=3, sigma=10.0, beta=8 / 3, rho=28.0):
    """
    Generate the SINDy coefficient matrix for the Lorenz system.

    Arguments:
        normalization - 3-element list of array specifying scaling of each Lorenz variable
        poly_order - Polynomial order of the SINDy model.
        sigma, beta, rho - Parameters of the Lorenz system
    """
    Xi = np.zeros((library_size(3, poly_order), 3))
    Xi[1, 0] = -sigma
    Xi[2, 0] = sigma * normalization[0] / normalization[1]
    Xi[1, 1] = rho * normalization[1] / normalization[0]
    Xi[2, 1] = -1
    Xi[6, 1] = -normalization[1] / (normalization[0] * normalization[2])
    Xi[3, 2] = -beta
    Xi[5, 2] = normalization[2] / (normalization[0] * normalization[1])
    return Xi

def generate_lorenz_data(
    ics, t, n_points, linear=True, normalization=None, sigma=10, beta=8 / 3, rho=28
):
    """
    Generate high-dimensional Lorenz data set.

    Arguments:
        ics - Nx3 array of N initial conditions
        t - array of time points over which to simulate
        n_points - size of the high-dimensional dataset created
        linear - Boolean value. If True, high-dimensional dataset is a linear combination
        of the Lorenz dynamics. If False, the dataset also includes cubic modes.
        normalization - Optional 3-value array for rescaling the 3 Lorenz variables.
        sigma, beta, rho - Parameters of the Lorenz dynamics.

    Returns:
        data - Dictionary containing elements of the dataset. This includes the time points (t),
        spatial mapping (y_spatial), high-dimensional modes used to generate the full dataset
        (modes), low-dimensional Lorenz dynamics (z, along with 1st and 2nd derivatives dz and
        ddz), high-dimensional dataset (x, along with 1st and 2nd derivatives dx and ddx), and
        the true Lorenz coefficient matrix for SINDy.
    """

    n_ics = ics.shape[0]
    n_steps = t.size
    dt = t[1] - t[0]

    d = 3
    z = np.zeros((n_ics, n_steps, d))
    dz = np.zeros(z.shape)
    ddz = np.zeros(z.shape)
    for i in range(n_ics):
        z[i], dz[i], ddz[i] = simulate_lorenz(
            ics[i], t, sigma=sigma, beta=beta, rho=rho
        )

    if normalization is not None:
        z *= normalization
        dz *= normalization
        ddz *= normalization

    n = n_points
    L = 1
    y_spatial = np.linspace(-L, L, n)

    modes = np.zeros((2 * d, n))
    for i in range(2 * d):
        modes[i] = legendre(i)(y_spatial)
        # modes[i] = chebyt(i)(y_spatial)
        # modes[i] = np.cos((i+1)*np.pi*y_spatial/2)
    x1 = np.zeros((n_ics, n_steps, n))
    x2 = np.zeros((n_ics, n_steps, n))
    x3 = np.zeros((n_ics, n_steps, n))
    x4 = np.zeros((n_ics, n_steps, n))
    x5 = np.zeros((n_ics, n_steps, n))
    x6 = np.zeros((n_ics, n_steps, n))

    x = np.zeros((n_ics, n_steps, n))
    dx = np.zeros(x.shape)
    ddx = np.zeros(x.shape)
    for i in range(n_ics):
        for j in range(n_steps):
            x1[i, j] = modes[0] * z[i, j, 0]
            x2[i, j] = modes[1] * z[i, j, 1]
            x3[i, j] = modes[2] * z[i, j, 2]
            x4[i, j] = modes[3] * z[i, j, 0] ** 3
            x5[i, j] = modes[4] * z[i, j, 1] ** 3
            x6[i, j] = modes[5] * z[i, j, 2] ** 3

            x[i, j] = x1[i, j] + x2[i, j] + x3[i, j]
            if not linear:
                x[i, j] += x4[i, j] + x5[i, j] + x6[i, j]

            dx[i, j] = (
                modes[0] * dz[i, j, 0] + modes[1] * dz[i, j, 1] + modes[2] * dz[i, j, 2]
            )
            if not linear:
                dx[i, j] += (
                    modes[3] * 3 * (z[i, j, 0] ** 2) * dz[i, j, 0]
                    + modes[4] * 3 * (z[i, j, 1] ** 2) * dz[i, j, 1]
                    + modes[5] * 3 * (z[i, j, 2] ** 2) * dz[i, j, 2]
                )

            ddx[i, j] = (
                modes[0] * ddz[i, j, 0]
                + modes[1] * ddz[i, j, 1]
                + modes[2] * ddz[i, j, 2]
            )
            if not linear:
                ddx[i, j] += (
                    modes[3]
                    * (
                        6 * z[i, j, 0] * dz[i, j, 0] ** 2
                        + 3 * (z[i, j, 0] ** 2) * ddz[i, j, 0]
                    )
                    + modes[4]
                    * (
                        6 * z[i, j, 1] * dz[i, j, 1] ** 2
                        + 3 * (z[i, j, 1] ** 2) * ddz[i, j, 1]
                    )
                    + modes[5]
                    * (
                        6 * z[i, j, 2] * dz[i, j, 2] ** 2
                        + 3 * (z[i, j, 2] ** 2) * ddz[i, j, 2]
                    )
                )

    if normalization is None:
        sindy_coefficients = lorenz_coefficients(
            [1, 1, 1], sigma=sigma, beta=beta, rho=rho
        )
    else:
        sindy_coefficients = lorenz_coefficients(
            normalization, sigma=sigma, beta=beta, rho=rho
        )

    data = {}
    data["t"] = t
    data["y_spatial"] = y_spatial
    data["modes"] = modes
    data["x"] = x
    data["dx"] = dx
    data["ddx"] = ddx
    data["z"] = z
    data["dz"] = dz
    data["ddz"] = ddz
    data["sindy_coefficients"] = sindy_coefficients.astype(np.float32)

    return data

## Pendulum

In [5]:
def get_pendulum_data(n_ics):
    t, x, dx, ddx, z = generate_pendulum_data(n_ics)
    data = {}
    data["t"] = t
    data["x"] = x.reshape((n_ics * t.size, -1))
    data["dx"] = dx.reshape((n_ics * t.size, -1))
    data["ddx"] = ddx.reshape((n_ics * t.size, -1))
    data["z"] = z.reshape((n_ics * t.size, -1))[:, 0:1]
    data["dz"] = z.reshape((n_ics * t.size, -1))[:, 1:2]

    return data


def generate_pendulum_data(n_ics):
    f = lambda z, t: [z[1], -np.sin(z[0])]
    t = np.arange(0, 10, 0.02)

    z = np.zeros((n_ics, t.size, 2))
    dz = np.zeros(z.shape)

    z1range = np.array([-np.pi, np.pi])
    z2range = np.array([-2.1, 2.1])
    i = 0
    while i < n_ics:
        z0 = np.array(
            [
                (z1range[1] - z1range[0]) * np.random.rand() + z1range[0],
                (z2range[1] - z2range[0]) * np.random.rand() + z2range[0],
            ]
        )
        if np.abs(z0[1] ** 2 / 2.0 - np.cos(z0[0])) > 0.99:
            continue
        z[i] = odeint(f, z0, t)
        dz[i] = np.array([f(z[i, j], t[j]) for j in range(len(t))])
        i += 1

    x, dx, ddx = pendulum_to_movie(z, dz)

    # n = 51
    # xx,yy = np.meshgrid(np.linspace(-1.5,1.5,n),np.linspace(1.5,-1.5,n))
    # create_image = lambda theta : np.exp(-((xx-np.cos(theta-np.pi/2))**2 + (yy-np.sin(theta-np.pi/2))**2)/.05)
    # argument_derivative = lambda theta,dtheta : -1/.05*(2*(xx - np.cos(theta-np.pi/2))*np.sin(theta-np.pi/2)*dtheta \
    #                                                   + 2*(yy - np.sin(theta-np.pi/2))*(-np.cos(theta-np.pi/2))*dtheta)
    # argument_derivative2 = lambda theta,dtheta,ddtheta : -2/.05*((np.sin(theta-np.pi/2))*np.sin(theta-np.pi/2)*dtheta**2 \
    #                                                            + (xx - np.cos(theta-np.pi/2))*np.cos(theta-np.pi/2)*dtheta**2 \
    #                                                            + (xx - np.cos(theta-np.pi/2))*np.sin(theta-np.pi/2)*ddtheta \
    #                                                            + (-np.cos(theta-np.pi/2))*(-np.cos(theta-np.pi/2))*dtheta**2 \
    #                                                            + (yy - np.sin(theta-np.pi/2))*(np.sin(theta-np.pi/2))*dtheta**2 \
    #                                                            + (yy - np.sin(theta-np.pi/2))*(-np.cos(theta-np.pi/2))*ddtheta)

    # x = np.zeros((n_ics, t.size, n, n))
    # dx = np.zeros((n_ics, t.size, n, n))
    # ddx = np.zeros((n_ics, t.size, n, n))
    # for i in range(n_ics):
    #     for j in range(t.size):
    #         z[i,j,0] = wrap_to_pi(z[i,j,0])
    #         x[i,j] = create_image(z[i,j,0])
    #         dx[i,j] = (create_image(z[i,j,0])*argument_derivative(z[i,j,0], dz[i,j,0]))
    #         ddx[i,j] = create_image(z[i,j,0])*((argument_derivative(z[i,j,0], dz[i,j,0]))**2 \
    #                         + argument_derivative2(z[i,j,0], dz[i,j,0], dz[i,j,1]))

    return t, x, dx, ddx, z


def pendulum_to_movie(z, dz):
    n_ics = z.shape[0]
    n_samples = z.shape[1]
    n = 51
    y1, y2 = np.meshgrid(np.linspace(-1.5, 1.5, n), np.linspace(1.5, -1.5, n))
    create_image = lambda theta: np.exp(
        -((y1 - np.cos(theta - np.pi / 2)) ** 2 + (y2 - np.sin(theta - np.pi / 2)) ** 2)
        / 0.05
    )
    argument_derivative = (
        lambda theta, dtheta: -1
        / 0.05
        * (
            2 * (y1 - np.cos(theta - np.pi / 2)) * np.sin(theta - np.pi / 2) * dtheta
            + 2
            * (y2 - np.sin(theta - np.pi / 2))
            * (-np.cos(theta - np.pi / 2))
            * dtheta
        )
    )
    argument_derivative2 = (
        lambda theta, dtheta, ddtheta: -2
        / 0.05
        * (
            (np.sin(theta - np.pi / 2)) * np.sin(theta - np.pi / 2) * dtheta**2
            + (y1 - np.cos(theta - np.pi / 2)) * np.cos(theta - np.pi / 2) * dtheta**2
            + (y1 - np.cos(theta - np.pi / 2)) * np.sin(theta - np.pi / 2) * ddtheta
            + (-np.cos(theta - np.pi / 2)) * (-np.cos(theta - np.pi / 2)) * dtheta**2
            + (y2 - np.sin(theta - np.pi / 2)) * (np.sin(theta - np.pi / 2)) * dtheta**2
            + (y2 - np.sin(theta - np.pi / 2)) * (-np.cos(theta - np.pi / 2)) * ddtheta
        )
    )

    x = np.zeros((n_ics, n_samples, n, n))
    dx = np.zeros((n_ics, n_samples, n, n))
    ddx = np.zeros((n_ics, n_samples, n, n))
    for i in range(n_ics):
        for j in range(n_samples):
            z[i, j, 0] = wrap_to_pi(z[i, j, 0])
            x[i, j] = create_image(z[i, j, 0])
            dx[i, j] = create_image(z[i, j, 0]) * argument_derivative(
                z[i, j, 0], dz[i, j, 0]
            )
            ddx[i, j] = create_image(z[i, j, 0]) * (
                (argument_derivative(z[i, j, 0], dz[i, j, 0])) ** 2
                + argument_derivative2(z[i, j, 0], dz[i, j, 0], dz[i, j, 1])
            )

    return x, dx, ddx


def wrap_to_pi(z):
    z_mod = z % (2 * np.pi)
    subtract_m = (z_mod > np.pi) * (-2 * np.pi)
    return z_mod + subtract_m

# Neural Network

## Custom Linear Layer

In [6]:
class LinearLayer(torch.nn.Linear):
    """Custom implementation of layer which includes the activation function
    and its derivative
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        activation_function: torch.nn.Module,
        last: bool = False,
        order: int = 1,
        bias: bool = True,
        device=None,
        dtype=None,
    ) -> None:

        # Constructor for a regular linear layer
        super().__init__(in_features, out_features, bias, device, dtype=torch.float64)

        # Our modifications to the linear layer

        # Store the activation function
        self.activation_function = activation_function

        # Store the activation function's first derivative
        self.activation_derivative = self.__get_activation_derivative()

        # Store the activation function'sclass LinearLayer(torch.nn.Linear):
    """Custom implementation of layer which includes the activation function
    and its derivative
    """

    def __init__(
        self,
        in_features: int,
        out_features: int,
        activation_function: torch.nn.Module,
        last: bool = False,
        order: int = 1,
        bias: bool = True,
        device=None,
        dtype=None,
    ) -> None:

        # Constructor for a regular linear layer
        super().__init__(in_features, out_features, bias, device, dtype=torch.float64)

        # Our modifications to the linear layer

        # Store the activation function
        self.activation_function = activation_function

        # Store the activation function's first derivative
        self.activation_derivative = self.__get_activation_derivative()

        # Store the activation function's second derivative
        self.activation_2nd_derivative = self.__get_activation_2nd_derivative()

        # Store the most recently computed
        self.last = last
        if order == 1:
            self.forward = self.forward_dx
        else:
            self.forward = self.forward_ddx

    def forward_dx(self, input: torch.Tensor) -> torch.Tensor:

        print(input)
        x, dx = input[: input.shape[0] // 2], input[input.shape[0] // 2 :]
        dim = self.weight.shape[0]

        if self.last:
            x = F.linear(x, self.weight, self.bias)
            dx = F.linear(dx, self.weight, torch.zeros_like(self.bias))
        else:
            x = F.linear(x, self.weight, self.bias)
            dx = self.activation_derivative(x) * F.linear(
                dx, self.weight, torch.zeros_like(self.bias)
            )
            x = self.activation_function(x)

        return torch.cat((x, dx), dim=0)

    def forward_ddx(self, input: torch.Tensor) -> torch.Tensor:

        slicer = input.shape[0] // 3
        x, dx, ddx = input[:slicer], input[slicer : 2 * slicer], input[2 * slicer :]

        if self.last:
            x = F.linear(x, self.weight, self.bias)
            dx = F.linear(dx, self.weight, torch.zeros_like(self.bias))
            ddx = F.linear(ddx, self.weight, torch.zeros_like(self.bias))
        else:
            x = F.linear(x, self.weight, self.bias)
            dx_ = F.linear(dx, self.weight, torch.zeros_like(self.bias))
            ddx_ = F.linear(ddx, self.weight, torch.zeros_like(self.bias))

            dactivation = self.activation_derivative(x)
            ddactivation = self.activation_2nd_derivative(x, dactivation)

            dx = dactivation * dx_
            ddx = ddactivation * dx_ + dactivation * ddx_

            x = self.activation_function(x)

        return torch.cat((x, dx, ddx), dim=0)

    def __get_activation_derivative(self):
        if isinstance(self.activation_function, torch.nn.ReLU):
            return lambda x: torch.where(x > 0, torch.ones_like(x), torch.zeros_like(x))
        if isinstance(self.activation_function, torch.nn.ELU):
            return lambda x: torch.minimum(x, torch.exp(x))
        if isinstance(self.activation_function, torch.nn.Sigmoid):
            return lambda x: self.dsigmoid(x)

    def __get_activation_2nd_derivative(self):
        if isinstance(self.activation_function, torch.nn.ReLU):
            return lambda x, dx=0: 0
        if isinstance(self.activation_function, torch.nn.ELU):
            return lambda x, dx=0: torch.where(x > 0, torch.exp(x), torch.zeros_like(x))
        if isinstance(self.activation_function, torch.nn.Sigmoid):
            return lambda x, dx=0: self.dsigmoid(dx)

    def dsigmoid(self, x):
        sigmoid = self.activation_function(x)
        return sigmoid * (1 - sigmoid)
        self.activation_2nd_derivative = self.__get_activation_2nd_derivative()

        # Store the most recently computed
        self.last = last
        if order == 1:
            self.forward = self.forward_dx
        else:
            self.forward = self.forward_ddx

    def forward_dx(self, input: torch.Tensor):

        print(input)
        x, dx = input[: input.shape[0] // 2], input[input.shape[0] // 2 :]
        dim = self.weight.shape[0]

        if self.last:
            x = F.linear(x, self.weight, self.bias)
            dx = F.linear(dx, self.weight, torch.zeros_like(self.bias))
        else:
            x = F.linear(x, self.weight, self.bias)
            dx = self.activation_derivative(x) * F.linear(
                dx, self.weight, torch.zeros_like(self.bias)
            )
            x = self.activation_function(x)

        return torch.cat((x, dx), dim=0)

    def forward_ddx(self, input: torch.Tensor):

        slicer = input.shape[0] // 3
        x, dx, ddx = input[:slicer], input[slicer : 2 * slicer], input[2 * slicer :]

        if self.last:
            x = F.linear(x, self.weight, self.bias)
            dx = F.linear(dx, self.weight, torch.zeros_like(self.bias))
            ddx = F.linear(ddx, self.weight, torch.zeros_like(self.bias))
        else:
            x = F.linear(x, self.weight, self.bias)
            dx_ = F.linear(dx, self.weight, torch.zeros_like(self.bias))
            ddx_ = F.linear(ddx, self.weight, torch.zeros_like(self.bias))

            dactivation = self.activation_derivative(x)
            ddactivation = self.activation_2nd_derivative(x, dactivation)

            dx = dactivation * dx_
            ddx = ddactivation * dx_ + dactivation * ddx_

            x = self.activation_function(x)

        return torch.cat((x, dx, ddx), dim=0)

    def __get_activation_derivative(self):
        if isinstance(self.activation_function, torch.nn.ReLU):
            return lambda x: torch.where(x > 0, torch.ones_like(x), torch.zeros_like(x))
        if isinstance(self.activation_function, torch.nn.ELU):
            return lambda x: torch.minimum(x, torch.exp(x))
        if isinstance(self.activation_function, torch.nn.Sigmoid):
            return lambda x: self.dsigmoid(x)

    def __get_activation_2nd_derivative(self):
        if isinstance(self.activation_function, torch.nn.ReLU):
            return lambda x, dx=0: 0
        if isinstance(self.activation_function, torch.nn.ELU):
            return lambda x, dx=0: torch.where(x > 0, torch.exp(x), torch.zeros_like(x))
        if isinstance(self.activation_function, torch.nn.Sigmoid):
            return lambda x, dx=0: self.dsigmoid(dx)

    def dsigmoid(self, x):
        sigmoid = self.activation_function(x)
        return sigmoid * (1 - sigmoid)

## Loss Function

In [7]:
class Loss(torch.nn.Module):
    def __init__(
        self,
        lambda_1: float,
        lambda_2: float,
        lambda_3: float,
        lambda_r: float,
        order: int = 1,
        *args,
        **kwargs
    ) -> None:
        """Custom loss fucnction based on multiple MSEs

        Args:
            lambda_1 (float): loss weight decoder
            lambda_2 (float): loss weight sindy z
            lambda_3 (float): loss weight sindy x
            lambda_r (float): loss weight sindy regularization
            order (int, optional): Order of the model can be 1 or 2. Defaults to 1.
        """
        super().__init__(*args, **kwargs)
        self.lambda_1 = lambda_1
        self.lambda_2 = lambda_2
        self.lambda_3 = lambda_3
        self.lambda_r = lambda_r

        self.regularization = True

        if order == 1:
            self.forward = self.forward_dx
        else:
            self.forward = self.forward_ddx

    def forward_dx(
        self,
        x,
        dx,
        dz,
        dz_pred,
        x_decode,
        dx_decode,
        sindy_coeffs: torch.Tensor,
        coeff_mask,
    ) -> torch.Tensor:

        loss = 0

        loss += self.lambda_1 * torch.mean((x - x_decode) ** 2)
        loss += self.lambda_2 * torch.mean((dz - dz_pred) ** 2)
        loss += self.lambda_3 * torch.mean((dx - dx_decode) ** 2)
        loss += (
            int(self.regularization)
            * self.lambda_r
            * torch.mean(torch.abs(sindy_coeffs) * coeff_mask)
        )

        return loss

    def forward_ddx(
        self,
        x,
        dx,
        dz,
        dz_pred,
        x_decode,
        dx_decode,
        sindy_coeffs: torch.Tensor,
        ddz,
        ddx,
        ddx_decode,
        coeff_mask,
    ) -> torch.Tensor:

        loss = 0

        loss += self.lambda_1 * torch.mean((x - x_decode) ** 2)
        # dz_pred is in this case ddz_pred
        loss += self.lambda_2 * torch.mean((ddz - dz_pred) ** 2)
        loss += self.lambda_3 * torch.mean((ddx - ddx_decode) ** 2)
        loss += (
            int(self.regularization)
            * self.lambda_r
            * torch.mean(torch.abs(sindy_coeffs) * coeff_mask)
        )

        return loss

    def set_regularization(self, include_regularization: bool) -> None:

        self.regularization = include_regularization

## Autoencoder

In [8]:
class AutoEncoder(torch.nn.Module):

    RELU = "relu"
    SIGMOID = "sigmoid"
    ELU = "elu"

    def __init__(
        self, params: dict = {}, name: str = "encoder", *args, **kwargs
    ) -> None:

        super().__init__(*args, **kwargs)
        self.params = params

        activation = self.params["activation"]
        self.activation_function = self.__get_activation(activation)

        self.weights = (
            [self.params["input_dim"]]
            + self.params["widths"]
            + [self.params["latent_dim"]]
        )
        self.order = self.params["model_order"]

        if self.weights is None:
            raise TypeError("Missing weight param")

        if name == "encoder":
            self.__create_encoder()
        elif name == "decoder":
            self.__create_decoder()

    def __create_encoder(self) -> None:
        """Creates the encoder based on weights and activation function"""
        layers = []
        for curr_weights, next_weights in zip(self.weights[:-1], self.weights[1:]):
            layers.append(
                LinearLayer(
                    curr_weights,
                    next_weights,
                    self.activation_function,
                    len(layers) + 2 == len(self.weights),
                    self.order,
                )
            )
        self.net = torch.nn.Sequential(*layers)

    def __create_decoder(self) -> None:
        """Creates decoder, the weights are swapped and reversed compared to the encoder"""
        layers = []
        for curr_weights, next_weights in zip(
            reversed(self.weights[1:]), reversed(self.weights[:-1])
        ):
            layers.append(
                LinearLayer(
                    curr_weights,
                    next_weights,
                    self.activation_function,
                    len(layers) + 2 == len(self.weights),
                    self.order,
                )
            )

        self.net = torch.nn.Sequential(*layers)

    def __get_activation(self, activation: str = "relu") -> torch.nn.Module:
        match (activation):
            case self.RELU:
                return torch.nn.ReLU()
            case self.SIGMOID:
                return torch.nn.Sigmoid()
            case self.ELU:
                return torch.nn.ELU()
            case _:
                raise TypeError(f"Invalid activation function {activation}")

    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
        """Forward function of the autoencoder

        Args:
            x (List[Tensor]): either the List has 2 or 3 elements
            if it has 2 elements the model order has to be set to 1
            if it has 3 elements the model order has to be set to 2

        Returns:
            List[torch.Tensor]: returns the forward passed list whit the same number of elements as the input
        """
        return self.net(x)

## SINDy Network

In [9]:
class SINDy(torch.nn.Module):
    """

    Description: Custom neural network module that embeds a SINDy model into an autoencoder.

    """

    def __init__(
        self,
        encoder: AutoEncoder,
        decoder: AutoEncoder,
        device: str,
        params: dict = {},
        *args,
        **kwargs
    ) -> None:
        """

        Description: Constructor for the SINDy class. Initializes the model parameters, encoder, and decoder.

        Args:
            encoder (AutoEncoder): The encoder part of the autoencoder.
            decoder (AutoEncoder): The decoder part of the autoencoder.
            params (Dict): A dictionary containing model and SINDy parameters.
            *args: Additional positional arguments.
            **kwargs: Additional keyword arguments.

        """

        super().__init__(*args, **kwargs)

        # Initialize model parameters, encoder, and decoder
        self.params = params
        self.encoder = encoder
        self.decoder = decoder

        # Set model order to help in intializing other attributes
        self.model_order = self.params["model_order"]

        # Initialize autoencoder parameters ----------
        self.input_dim = self.params["input_dim"]
        self.latent_dim = self.params["latent_dim"]

        # Initialize SINDy parameters ------------------------------------------------------

        # Library parameters
        self.poly_order = self.params["poly_order"]
        self.include_sine = self.params["include_sine"]
        if self.model_order == 1:
          self.library_dim = library_size(
              self.params["latent_dim"], self.params["poly_order"], self.params["include_sine"]
          )
        elif self.model_order == 2:
          self.library_dim = library_size(
              2*self.params["latent_dim"], self.params["poly_order"], self.params["include_sine"]
          )

        # Coefficient parameters
        self.sequential_thresholding = self.params["sequential_thresholding"]
        self.coefficient_initialization = self.params["coefficient_initialization"]
        self.coefficient_mask = torch.ones((self.library_dim, self.latent_dim)).to(
            device
        )
        self.coefficient_threshold = self.params["coefficient_threshold"]

        # Greek letter 'Xi' in the paper. Learned during training (different from linear regression).
        sindy_coefficients = self.init_sindy_coefficients(
            self.params["coefficient_initialization"]
        )
        # Treat sindy_coefficients as a parameter to be learned and move it to device
        self.sindy_coefficients = torch.nn.Parameter(
            sindy_coefficients.to(torch.float64).to(device)
        )

        # Order of dynamical system
        self.model_order = self.params["model_order"]
        if self.model_order == 1:
            self.forward = self.forward_dx
        else:
            self.forward = self.forward_ddx

    def init_sindy_coefficients(self, name="normal", std=1.0, k=1) -> torch.Tensor:
        """

        Description: Initializes the SINDy coefficients based on the specified method. These coefficients are learned during training.

        Args:
            name (str): The method for initializing the coefficients. Options are 'xavier', 'uniform', 'constant', and 'normal'.
            std (float): Standard deviation for normal initialization.
            k (float): Constant value for constant initialization.

        """

        sindy_coefficients = torch.zeros((self.library_dim, self.latent_dim))

        if name == "xavier":
            return torch.nn.init.xavier_uniform_(sindy_coefficients)
        elif name == "uniform":
            return torch.nn.init.uniform_(sindy_coefficients, low=0.0, high=1.0)
        elif name == "constant":
            return torch.ones_like(sindy_coefficients) * k
        elif name == "normal":
            return torch.nn.init.normal_(sindy_coefficients, mean=0, std=std)

    def forward_dx(self, x, dx) -> torch.Tensor:
        """

        Description: Forward pass for the SINDy model with first-order derivatives.

        Args:
            x (torch.Tensor): Input tensor representing the state of the system.
            dx (torch.Tensor): Input tensor representing the first-order derivatives of the state.

        Returns:
            torch.Tensor: The output tensors including the original state, first-order derivatives, predicted derivatives, and decoded states.

        """

        # pass input through encoder
        out_encode = self.encoder(torch.cat((x, dx)))
        dz = out_encode[out_encode.shape[0] // 2 :]
        z = out_encode[: out_encode.shape[0] // 2]

        # create library
        Theta = sindy_library_pt(z, self.latent_dim, self.poly_order, self.include_sine)

        # apply thresholding or not
        if self.sequential_thresholding:
            sindy_predict = torch.matmul(
                Theta, self.coefficient_mask * self.sindy_coefficients
            )
        else:
            sindy_predict = torch.matmul(Theta, self.sindy_coefficients)

        # decode transformed input (z) and predicted derivatives (z dot)
        x_decode = self.decoder(torch.cat((z, sindy_predict)))
        dx_decode = x_decode[x_decode.shape[0] // 2 :]
        x_decode = x_decode[: x_decode.shape[0] // 2]

        dz_predict = sindy_predict

        return (
            x,
            dx,
            dz_predict,
            dz,
            x_decode,
            dx_decode,
            self.sindy_coefficients,
        )

    def forward_ddx(self, x: torch.Tensor, dx: torch.Tensor, ddx: torch.Tensor):
        """

        Description: Forward pass for the SINDy model with second-order derivatives.

        Args:
            x (torch.Tensor): Input tensor representing the state of the system.
            dx (torch.Tensor): Input tensor representing the first-order derivatives of the state.
            ddx (torch.Tensor): Input tensor representing the second-order derivatives of the state.

        """

        out = self.encoder(torch.cat((x, dx, ddx)))
        slicer = out.shape[0] // 3
        z, dz, ddz = out[:slicer], out[slicer : 2 * slicer], out[2 * slicer :]

        # create Theta
        Theta = sindy_library_pt_order2(
            z, dz, self.latent_dim, self.poly_order, self.include_sine
        )

        # apply thresholding or not
        if self.sequential_thresholding:
            sindy_predict = torch.matmul(
                Theta, self.coefficient_mask * self.sindy_coefficients
            )
        else:
            sindy_predict = torch.matmul(Theta, self.sindy_coefficients)

        # decode
        out_decode = self.decoder(torch.cat((z, dz, sindy_predict)))
        slicer = out_decode.shape[0] // 3
        x_decode, dx_decode, ddx_decode = (
            out_decode[:slicer],
            out_decode[slicer : 2 * slicer],
            out_decode[2 * slicer :],
        )

        dz_predict = sindy_predict

        return (
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            self.sindy_coefficients,
            ddz,
            ddx,
            ddx_decode,
            self.coefficient_mask,
        )

# Training

In [10]:
def generate_training_data(system: str, num_ics: int):

  if system == "lorenz":
    training_data = get_lorenz_data(num_ics)
  elif system == "pendulum":
    training_data = get_pendulum_data(num_ics)

  return training_data

In [11]:
def train_first_order(sindy, num_epochs, optimizer, criterion, training_data):
    for epoch in tqdm(range(num_epochs), desc="Training"):
        sindy.train()
        optimizer.zero_grad()

        # Forward pass
        (
            x,
            dx,
            dz_predict,
            dz,
            x_decode,
            dx_decode,
            sindy_coefficients,
        ) = sindy(
            torch.from_numpy(training_data["x"]).to(device=device),
            torch.from_numpy(training_data["dx"]).to(device=device),
        )
        loss = criterion(
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            sindy_coefficients,
            sindy.coefficient_mask,
        )
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

        # Backward pass
        loss.backward()
        optimizer.step()

        # Optional coefficient thresholding
        if (
            criterion.regularization
            and params["sequential_thresholding"]
            and (epoch % params["threshold_frequency"] == 0)
            and (epoch > 0)
        ):
            sindy.coefficient_mask = (
                torch.abs(sindy_coefficients) > params["coefficient_threshold"]
            )
            print(
                f"THRESHOLDING: {torch.sum(sindy.coefficient_mask)} active coefficients"
            )

def train_second_order(sindy, num_epochs, refinement_epochs, optimizer, criterion, training_data, refinement_data):
    for epoch in tqdm(range(num_epochs), desc="Training"):
        sindy.train()
        optimizer.zero_grad()

        # Forward pass
        (
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            sindy_coefficients,
            ddz,
            ddx,
            ddx_decode,
            sindy_coefficient_mask,
        ) = sindy(
            torch.from_numpy(training_data["x"]).to(device=device),
            torch.from_numpy(training_data["dx"]).to(device=device),
            torch.from_numpy(training_data["ddx"]).to(device=device),
        )

        loss = criterion(
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            sindy_coefficients,
            ddz,
            ddx,
            ddx_decode,
            sindy_coefficient_mask,
        )
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

        # Backward pass
        loss.backward()
        optimizer.step()

        # Optional coefficient thresholding
        if (
            criterion.regularization
            and params["sequential_thresholding"]
            and (epoch % params["threshold_frequency"] == 0)
            and (epoch > 0)
        ):
            sindy.coefficient_mask = (
                torch.abs(sindy_coefficients) > params["coefficient_threshold"]
            )
            print(
                f"THRESHOLDING: {torch.sum(sindy.coefficient_mask)} active coefficients"
            )

    for epoch in tqdm(range(refinement_epochs), desc="Refinement"):
        sindy.train()
        optimizer.zero_grad()

        # Forward pass
        (
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            sindy_coefficients,
            ddz,
            ddx,
            ddx_decode,
            sindy_coefficient_mask,
        ) = sindy(
            torch.from_numpy(refinement_data["x"]).to(device=device),
            torch.from_numpy(refinement_data["dx"]).to(device=device),
            torch.from_numpy(refinement_data["ddx"]).to(device=device),
        )

        loss = criterion(
            x,
            dx,
            dz,
            dz_predict,
            x_decode,
            dx_decode,
            sindy_coefficients,
            ddz,
            ddx,
            ddx_decode,
            sindy_coefficient_mask,
        )
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

        # Backward pass
        loss.backward()
        optimizer.step()

In [15]:
params = json.load(open("params.json"))
encoder = AutoEncoder(params, "encoder")
decoder = AutoEncoder(params, "decoder")
sindy = SINDy(encoder=encoder, decoder=decoder, device=device, params=params)
sindy = sindy.to(device=device)
criterion = Loss(
    params["loss_weight_decoder"],
    params["loss_weight_sindy_z"],
    params["loss_weight_sindy_x"],
    params["loss_weight_sindy_regularization"],
    order=params["model_order"],
)
optimizer = torch.optim.Adam(sindy.parameters(), lr=params["learning_rate"])
training_data = generate_training_data("pendulum", 5)
refinement_data = generate_training_data("pendulum", 1)
num_epochs = training_data['x'].shape[0]
refinement_epochs = refinement_data['x'].shape[0]

# Train with regularization
criterion.set_regularization(True)
train_second_order(sindy, num_epochs, refinement_epochs, optimizer, criterion, training_data, refinement_data)

# Save the model
torch.save({
    'encoder': encoder.state_dict(),
    'decoder': decoder.state_dict(),
    'sindy': sindy.state_dict(),
    'optimizer': optimizer.state_dict(),
    'params': params  # Optional: save config to make loading easier
}, "sindy_model_checkpoint.pth")

Training:   0%|          | 1/2500 [00:00<11:56,  3.49it/s]

Epoch 1, Loss: 0.1021


Training:   0%|          | 2/2500 [00:00<09:33,  4.36it/s]

Epoch 2, Loss: 0.0988
Epoch 3, Loss: 0.0956


Training:   0%|          | 4/2500 [00:00<08:07,  5.12it/s]

Epoch 4, Loss: 0.0925
Epoch 5, Loss: 0.0895


Training:   0%|          | 6/2500 [00:01<07:43,  5.38it/s]

Epoch 6, Loss: 0.0866
Epoch 7, Loss: 0.0838


Training:   0%|          | 8/2500 [00:01<07:38,  5.44it/s]

Epoch 8, Loss: 0.0811
Epoch 9, Loss: 0.0785


Training:   0%|          | 10/2500 [00:01<07:30,  5.53it/s]

Epoch 10, Loss: 0.0760
Epoch 11, Loss: 0.0736


Training:   0%|          | 12/2500 [00:02<07:26,  5.57it/s]

Epoch 12, Loss: 0.0713
Epoch 13, Loss: 0.0691


Training:   1%|          | 14/2500 [00:02<07:25,  5.58it/s]

Epoch 14, Loss: 0.0669
Epoch 15, Loss: 0.0649


Training:   1%|          | 16/2500 [00:02<07:23,  5.59it/s]

Epoch 16, Loss: 0.0629
Epoch 17, Loss: 0.0610


Training:   1%|          | 18/2500 [00:03<07:30,  5.51it/s]

Epoch 18, Loss: 0.0591
Epoch 19, Loss: 0.0574


Training:   1%|          | 20/2500 [00:03<07:36,  5.43it/s]

Epoch 20, Loss: 0.0557
Epoch 21, Loss: 0.0541


Training:   1%|          | 22/2500 [00:04<07:32,  5.47it/s]

Epoch 22, Loss: 0.0525
Epoch 23, Loss: 0.0510


Training:   1%|          | 24/2500 [00:04<07:32,  5.47it/s]

Epoch 24, Loss: 0.0496
Epoch 25, Loss: 0.0482


Training:   1%|          | 26/2500 [00:04<07:36,  5.42it/s]

Epoch 26, Loss: 0.0469
Epoch 27, Loss: 0.0456


Training:   1%|          | 28/2500 [00:05<07:30,  5.48it/s]

Epoch 28, Loss: 0.0444
Epoch 29, Loss: 0.0433


Training:   1%|          | 30/2500 [00:05<08:33,  4.81it/s]

Epoch 30, Loss: 0.0422
Epoch 31, Loss: 0.0411


Training:   1%|▏         | 32/2500 [00:06<07:55,  5.19it/s]

Epoch 32, Loss: 0.0401
Epoch 33, Loss: 0.0391


Training:   1%|▏         | 34/2500 [00:06<07:37,  5.40it/s]

Epoch 34, Loss: 0.0381
Epoch 35, Loss: 0.0372


Training:   1%|▏         | 36/2500 [00:06<07:28,  5.49it/s]

Epoch 36, Loss: 0.0364
Epoch 37, Loss: 0.0355


Training:   2%|▏         | 38/2500 [00:07<07:24,  5.54it/s]

Epoch 38, Loss: 0.0347
Epoch 39, Loss: 0.0340


Training:   2%|▏         | 40/2500 [00:07<07:20,  5.58it/s]

Epoch 40, Loss: 0.0332
Epoch 41, Loss: 0.0325


Training:   2%|▏         | 42/2500 [00:07<07:23,  5.54it/s]

Epoch 42, Loss: 0.0318
Epoch 43, Loss: 0.0312


Training:   2%|▏         | 44/2500 [00:08<07:20,  5.57it/s]

Epoch 44, Loss: 0.0306
Epoch 45, Loss: 0.0300


Training:   2%|▏         | 46/2500 [00:08<07:19,  5.59it/s]

Epoch 46, Loss: 0.0294
Epoch 47, Loss: 0.0288


Training:   2%|▏         | 48/2500 [00:08<07:17,  5.61it/s]

Epoch 48, Loss: 0.0283
Epoch 49, Loss: 0.0278


Training:   2%|▏         | 50/2500 [00:09<07:16,  5.61it/s]

Epoch 50, Loss: 0.0273
Epoch 51, Loss: 0.0268


Training:   2%|▏         | 52/2500 [00:09<07:15,  5.62it/s]

Epoch 52, Loss: 0.0264
Epoch 53, Loss: 0.0259


Training:   2%|▏         | 54/2500 [00:09<07:14,  5.63it/s]

Epoch 54, Loss: 0.0255
Epoch 55, Loss: 0.0251


Training:   2%|▏         | 56/2500 [00:10<07:14,  5.63it/s]

Epoch 56, Loss: 0.0247
Epoch 57, Loss: 0.0243


Training:   2%|▏         | 58/2500 [00:10<07:13,  5.64it/s]

Epoch 58, Loss: 0.0240
Epoch 59, Loss: 0.0236


Training:   2%|▏         | 60/2500 [00:10<07:12,  5.64it/s]

Epoch 60, Loss: 0.0233
Epoch 61, Loss: 0.0230


Training:   2%|▏         | 62/2500 [00:11<07:12,  5.63it/s]

Epoch 62, Loss: 0.0227
Epoch 63, Loss: 0.0224


Training:   3%|▎         | 64/2500 [00:11<07:12,  5.63it/s]

Epoch 64, Loss: 0.0221
Epoch 65, Loss: 0.0218


Training:   3%|▎         | 66/2500 [00:12<07:13,  5.61it/s]

Epoch 66, Loss: 0.0215
Epoch 67, Loss: 0.0213


Training:   3%|▎         | 68/2500 [00:12<07:12,  5.63it/s]

Epoch 68, Loss: 0.0210
Epoch 69, Loss: 0.0208


Training:   3%|▎         | 70/2500 [00:12<07:11,  5.63it/s]

Epoch 70, Loss: 0.0206
Epoch 71, Loss: 0.0203


Training:   3%|▎         | 72/2500 [00:13<07:13,  5.60it/s]

Epoch 72, Loss: 0.0201
Epoch 73, Loss: 0.0199


Training:   3%|▎         | 74/2500 [00:13<07:12,  5.61it/s]

Epoch 74, Loss: 0.0197
Epoch 75, Loss: 0.0195


Training:   3%|▎         | 76/2500 [00:13<07:11,  5.61it/s]

Epoch 76, Loss: 0.0193
Epoch 77, Loss: 0.0192


Training:   3%|▎         | 78/2500 [00:14<07:08,  5.65it/s]

Epoch 78, Loss: 0.0190
Epoch 79, Loss: 0.0188


Training:   3%|▎         | 80/2500 [00:14<07:09,  5.63it/s]

Epoch 80, Loss: 0.0187
Epoch 81, Loss: 0.0185


Training:   3%|▎         | 82/2500 [00:14<07:08,  5.64it/s]

Epoch 82, Loss: 0.0184
Epoch 83, Loss: 0.0182


Training:   3%|▎         | 84/2500 [00:15<07:15,  5.55it/s]

Epoch 84, Loss: 0.0181
Epoch 85, Loss: 0.0179


Training:   3%|▎         | 86/2500 [00:15<07:15,  5.54it/s]

Epoch 86, Loss: 0.0178
Epoch 87, Loss: 0.0177


Training:   4%|▎         | 88/2500 [00:15<07:17,  5.52it/s]

Epoch 88, Loss: 0.0176
Epoch 89, Loss: 0.0174


Training:   4%|▎         | 90/2500 [00:16<07:14,  5.55it/s]

Epoch 90, Loss: 0.0173
Epoch 91, Loss: 0.0172


Training:   4%|▎         | 92/2500 [00:16<07:21,  5.45it/s]

Epoch 92, Loss: 0.0171
Epoch 93, Loss: 0.0170


Training:   4%|▍         | 94/2500 [00:17<07:22,  5.44it/s]

Epoch 94, Loss: 0.0169
Epoch 95, Loss: 0.0168


Training:   4%|▍         | 96/2500 [00:17<07:14,  5.54it/s]

Epoch 96, Loss: 0.0167
Epoch 97, Loss: 0.0166


Training:   4%|▍         | 98/2500 [00:17<07:10,  5.58it/s]

Epoch 98, Loss: 0.0165
Epoch 99, Loss: 0.0165


Training:   4%|▍         | 100/2500 [00:18<07:10,  5.58it/s]

Epoch 100, Loss: 0.0164
Epoch 101, Loss: 0.0163


Training:   4%|▍         | 102/2500 [00:18<07:10,  5.57it/s]

Epoch 102, Loss: 0.0162
Epoch 103, Loss: 0.0162


Training:   4%|▍         | 104/2500 [00:18<07:08,  5.60it/s]

Epoch 104, Loss: 0.0161
Epoch 105, Loss: 0.0160


Training:   4%|▍         | 106/2500 [00:19<07:07,  5.61it/s]

Epoch 106, Loss: 0.0160
Epoch 107, Loss: 0.0159


Training:   4%|▍         | 108/2500 [00:19<07:06,  5.61it/s]

Epoch 108, Loss: 0.0158
Epoch 109, Loss: 0.0158


Training:   4%|▍         | 110/2500 [00:19<07:04,  5.62it/s]

Epoch 110, Loss: 0.0157
Epoch 111, Loss: 0.0157


Training:   4%|▍         | 112/2500 [00:20<07:04,  5.62it/s]

Epoch 112, Loss: 0.0156
Epoch 113, Loss: 0.0156


Training:   5%|▍         | 114/2500 [00:20<07:03,  5.63it/s]

Epoch 114, Loss: 0.0155
Epoch 115, Loss: 0.0155


Training:   5%|▍         | 116/2500 [00:21<07:02,  5.64it/s]

Epoch 116, Loss: 0.0154
Epoch 117, Loss: 0.0154


Training:   5%|▍         | 118/2500 [00:21<07:03,  5.62it/s]

Epoch 118, Loss: 0.0153
Epoch 119, Loss: 0.0153


Training:   5%|▍         | 120/2500 [00:21<07:02,  5.63it/s]

Epoch 120, Loss: 0.0152
Epoch 121, Loss: 0.0152


Training:   5%|▍         | 122/2500 [00:22<07:02,  5.63it/s]

Epoch 122, Loss: 0.0152
Epoch 123, Loss: 0.0151


Training:   5%|▍         | 124/2500 [00:22<07:02,  5.62it/s]

Epoch 124, Loss: 0.0151
Epoch 125, Loss: 0.0151


Training:   5%|▌         | 126/2500 [00:22<07:02,  5.62it/s]

Epoch 126, Loss: 0.0150
Epoch 127, Loss: 0.0150


Training:   5%|▌         | 128/2500 [00:23<07:02,  5.62it/s]

Epoch 128, Loss: 0.0150
Epoch 129, Loss: 0.0149


Training:   5%|▌         | 130/2500 [00:23<07:05,  5.57it/s]

Epoch 130, Loss: 0.0149
Epoch 131, Loss: 0.0149


Training:   5%|▌         | 132/2500 [00:23<07:32,  5.23it/s]

Epoch 132, Loss: 0.0148


Training:   5%|▌         | 133/2500 [00:24<07:36,  5.19it/s]

Epoch 133, Loss: 0.0148
Epoch 134, Loss: 0.0148


Training:   5%|▌         | 135/2500 [00:24<07:18,  5.39it/s]

Epoch 135, Loss: 0.0148
Epoch 136, Loss: 0.0147


Training:   5%|▌         | 137/2500 [00:24<07:09,  5.50it/s]

Epoch 137, Loss: 0.0147
Epoch 138, Loss: 0.0147


Training:   6%|▌         | 139/2500 [00:25<07:03,  5.57it/s]

Epoch 139, Loss: 0.0147
Epoch 140, Loss: 0.0147


Training:   6%|▌         | 141/2500 [00:25<07:02,  5.59it/s]

Epoch 141, Loss: 0.0146
Epoch 142, Loss: 0.0146


Training:   6%|▌         | 143/2500 [00:25<07:01,  5.59it/s]

Epoch 143, Loss: 0.0146
Epoch 144, Loss: 0.0146


Training:   6%|▌         | 145/2500 [00:26<07:00,  5.60it/s]

Epoch 145, Loss: 0.0146
Epoch 146, Loss: 0.0146


Training:   6%|▌         | 147/2500 [00:26<07:02,  5.57it/s]

Epoch 147, Loss: 0.0145
Epoch 148, Loss: 0.0145


Training:   6%|▌         | 149/2500 [00:27<07:27,  5.25it/s]

Epoch 149, Loss: 0.0145


Training:   6%|▌         | 150/2500 [00:27<08:16,  4.74it/s]

Epoch 150, Loss: 0.0145
Epoch 151, Loss: 0.0145


Training:   6%|▌         | 152/2500 [00:27<07:43,  5.06it/s]

Epoch 152, Loss: 0.0145
Epoch 153, Loss: 0.0144


Training:   6%|▌         | 154/2500 [00:28<10:00,  3.91it/s]

Epoch 154, Loss: 0.0144


Training:   6%|▌         | 155/2500 [00:28<09:09,  4.26it/s]

Epoch 155, Loss: 0.0144
Epoch 156, Loss: 0.0144


Training:   6%|▋         | 157/2500 [00:28<08:08,  4.80it/s]

Epoch 157, Loss: 0.0144
Epoch 158, Loss: 0.0144


Training:   6%|▋         | 159/2500 [00:29<07:40,  5.09it/s]

Epoch 159, Loss: 0.0144
Epoch 160, Loss: 0.0144


Training:   6%|▋         | 161/2500 [00:29<07:20,  5.31it/s]

Epoch 161, Loss: 0.0144
Epoch 162, Loss: 0.0144


Training:   7%|▋         | 163/2500 [00:29<07:08,  5.46it/s]

Epoch 163, Loss: 0.0143
Epoch 164, Loss: 0.0143


Training:   7%|▋         | 165/2500 [00:30<07:01,  5.53it/s]

Epoch 165, Loss: 0.0143
Epoch 166, Loss: 0.0143


Training:   7%|▋         | 167/2500 [00:30<06:59,  5.56it/s]

Epoch 167, Loss: 0.0143
Epoch 168, Loss: 0.0143


Training:   7%|▋         | 169/2500 [00:30<06:56,  5.59it/s]

Epoch 169, Loss: 0.0143
Epoch 170, Loss: 0.0143


Training:   7%|▋         | 171/2500 [00:31<06:56,  5.60it/s]

Epoch 171, Loss: 0.0143
Epoch 172, Loss: 0.0143


Training:   7%|▋         | 173/2500 [00:31<06:55,  5.59it/s]

Epoch 173, Loss: 0.0143
Epoch 174, Loss: 0.0143


Training:   7%|▋         | 175/2500 [00:32<06:54,  5.61it/s]

Epoch 175, Loss: 0.0143
Epoch 176, Loss: 0.0142


Training:   7%|▋         | 177/2500 [00:32<06:54,  5.61it/s]

Epoch 177, Loss: 0.0142
Epoch 178, Loss: 0.0142


Training:   7%|▋         | 179/2500 [00:32<06:55,  5.59it/s]

Epoch 179, Loss: 0.0142
Epoch 180, Loss: 0.0142


Training:   7%|▋         | 181/2500 [00:33<06:54,  5.59it/s]

Epoch 181, Loss: 0.0142
Epoch 182, Loss: 0.0142


Training:   7%|▋         | 183/2500 [00:33<06:53,  5.60it/s]

Epoch 183, Loss: 0.0142
Epoch 184, Loss: 0.0142


Training:   7%|▋         | 185/2500 [00:33<06:53,  5.60it/s]

Epoch 185, Loss: 0.0142
Epoch 186, Loss: 0.0142


Training:   7%|▋         | 187/2500 [00:34<06:53,  5.59it/s]

Epoch 187, Loss: 0.0142
Epoch 188, Loss: 0.0142


Training:   8%|▊         | 189/2500 [00:34<06:53,  5.59it/s]

Epoch 189, Loss: 0.0142
Epoch 190, Loss: 0.0142


Training:   8%|▊         | 191/2500 [00:34<06:51,  5.61it/s]

Epoch 191, Loss: 0.0142
Epoch 192, Loss: 0.0142


Training:   8%|▊         | 193/2500 [00:35<06:51,  5.60it/s]

Epoch 193, Loss: 0.0142
Epoch 194, Loss: 0.0142


Training:   8%|▊         | 195/2500 [00:35<06:52,  5.59it/s]

Epoch 195, Loss: 0.0142
Epoch 196, Loss: 0.0142


Training:   8%|▊         | 197/2500 [00:35<06:52,  5.58it/s]

Epoch 197, Loss: 0.0142
Epoch 198, Loss: 0.0142


Training:   8%|▊         | 199/2500 [00:36<06:51,  5.60it/s]

Epoch 199, Loss: 0.0142
Epoch 200, Loss: 0.0142


Training:   8%|▊         | 201/2500 [00:36<06:49,  5.61it/s]

Epoch 201, Loss: 0.0141
Epoch 202, Loss: 0.0141


Training:   8%|▊         | 203/2500 [00:37<06:51,  5.58it/s]

Epoch 203, Loss: 0.0141
Epoch 204, Loss: 0.0141


Training:   8%|▊         | 205/2500 [00:37<06:51,  5.58it/s]

Epoch 205, Loss: 0.0141
Epoch 206, Loss: 0.0141


Training:   8%|▊         | 207/2500 [00:37<06:53,  5.55it/s]

Epoch 207, Loss: 0.0141
Epoch 208, Loss: 0.0141


Training:   8%|▊         | 209/2500 [00:38<06:50,  5.58it/s]

Epoch 209, Loss: 0.0141
Epoch 210, Loss: 0.0141


Training:   8%|▊         | 211/2500 [00:38<06:49,  5.59it/s]

Epoch 211, Loss: 0.0141
Epoch 212, Loss: 0.0141


Training:   9%|▊         | 213/2500 [00:38<06:50,  5.57it/s]

Epoch 213, Loss: 0.0141
Epoch 214, Loss: 0.0141


Training:   9%|▊         | 215/2500 [00:39<06:50,  5.56it/s]

Epoch 215, Loss: 0.0141
Epoch 216, Loss: 0.0141


Training:   9%|▊         | 217/2500 [00:39<06:59,  5.45it/s]

Epoch 217, Loss: 0.0141
Epoch 218, Loss: 0.0141


Training:   9%|▉         | 219/2500 [00:39<06:59,  5.44it/s]

Epoch 219, Loss: 0.0141
Epoch 220, Loss: 0.0141


Training:   9%|▉         | 221/2500 [00:40<06:56,  5.47it/s]

Epoch 221, Loss: 0.0141
Epoch 222, Loss: 0.0141


Training:   9%|▉         | 223/2500 [00:40<06:59,  5.42it/s]

Epoch 223, Loss: 0.0141
Epoch 224, Loss: 0.0141


Training:   9%|▉         | 225/2500 [00:41<07:02,  5.39it/s]

Epoch 225, Loss: 0.0141
Epoch 226, Loss: 0.0141


Training:   9%|▉         | 227/2500 [00:41<06:58,  5.43it/s]

Epoch 227, Loss: 0.0141
Epoch 228, Loss: 0.0141


Training:   9%|▉         | 229/2500 [00:41<06:51,  5.52it/s]

Epoch 229, Loss: 0.0141
Epoch 230, Loss: 0.0141


Training:   9%|▉         | 231/2500 [00:42<06:49,  5.55it/s]

Epoch 231, Loss: 0.0141
Epoch 232, Loss: 0.0141


Training:   9%|▉         | 233/2500 [00:42<06:46,  5.58it/s]

Epoch 233, Loss: 0.0141
Epoch 234, Loss: 0.0141


Training:   9%|▉         | 235/2500 [00:42<06:45,  5.58it/s]

Epoch 235, Loss: 0.0141
Epoch 236, Loss: 0.0141


Training:   9%|▉         | 237/2500 [00:43<06:46,  5.57it/s]

Epoch 237, Loss: 0.0141
Epoch 238, Loss: 0.0141


Training:  10%|▉         | 239/2500 [00:43<06:45,  5.58it/s]

Epoch 239, Loss: 0.0141
Epoch 240, Loss: 0.0141


Training:  10%|▉         | 241/2500 [00:43<06:43,  5.60it/s]

Epoch 241, Loss: 0.0141
Epoch 242, Loss: 0.0141


Training:  10%|▉         | 243/2500 [00:44<06:43,  5.60it/s]

Epoch 243, Loss: 0.0141
Epoch 244, Loss: 0.0141


Training:  10%|▉         | 245/2500 [00:44<06:42,  5.60it/s]

Epoch 245, Loss: 0.0141
Epoch 246, Loss: 0.0141


Training:  10%|▉         | 247/2500 [00:44<06:41,  5.61it/s]

Epoch 247, Loss: 0.0141
Epoch 248, Loss: 0.0141


Training:  10%|▉         | 249/2500 [00:45<06:40,  5.62it/s]

Epoch 249, Loss: 0.0141
Epoch 250, Loss: 0.0141


Training:  10%|█         | 251/2500 [00:45<06:58,  5.37it/s]

Epoch 251, Loss: 0.0141
THRESHOLDING: 9 active coefficients
Epoch 252, Loss: 0.0141


Training:  10%|█         | 253/2500 [00:46<06:38,  5.64it/s]

Epoch 253, Loss: 0.0141
Epoch 254, Loss: 0.0141


Training:  10%|█         | 255/2500 [00:46<06:40,  5.60it/s]

Epoch 255, Loss: 0.0141
Epoch 256, Loss: 0.0141


Training:  10%|█         | 257/2500 [00:46<06:39,  5.61it/s]

Epoch 257, Loss: 0.0141
Epoch 258, Loss: 0.0141


Training:  10%|█         | 259/2500 [00:47<06:42,  5.57it/s]

Epoch 259, Loss: 0.0141
Epoch 260, Loss: 0.0141


Training:  10%|█         | 261/2500 [00:47<06:39,  5.60it/s]

Epoch 261, Loss: 0.0141
Epoch 262, Loss: 0.0141


Training:  11%|█         | 263/2500 [00:47<06:38,  5.61it/s]

Epoch 263, Loss: 0.0141
Epoch 264, Loss: 0.0141


Training:  11%|█         | 265/2500 [00:48<06:39,  5.60it/s]

Epoch 265, Loss: 0.0141
Epoch 266, Loss: 0.0141


Training:  11%|█         | 267/2500 [00:48<06:42,  5.55it/s]

Epoch 267, Loss: 0.0141
Epoch 268, Loss: 0.0141


Training:  11%|█         | 269/2500 [00:48<06:40,  5.57it/s]

Epoch 269, Loss: 0.0141
Epoch 270, Loss: 0.0141


Training:  11%|█         | 271/2500 [00:49<06:39,  5.58it/s]

Epoch 271, Loss: 0.0141
Epoch 272, Loss: 0.0141


Training:  11%|█         | 273/2500 [00:49<06:38,  5.59it/s]

Epoch 273, Loss: 0.0141
Epoch 274, Loss: 0.0141


Training:  11%|█         | 275/2500 [00:49<06:37,  5.59it/s]

Epoch 275, Loss: 0.0141
Epoch 276, Loss: 0.0141


Training:  11%|█         | 277/2500 [00:50<06:39,  5.56it/s]

Epoch 277, Loss: 0.0141
Epoch 278, Loss: 0.0141


Training:  11%|█         | 279/2500 [00:50<06:39,  5.56it/s]

Epoch 279, Loss: 0.0141
Epoch 280, Loss: 0.0141


Training:  11%|█         | 281/2500 [00:51<06:38,  5.57it/s]

Epoch 281, Loss: 0.0141
Epoch 282, Loss: 0.0141


Training:  11%|█▏        | 283/2500 [00:51<06:40,  5.53it/s]

Epoch 283, Loss: 0.0141
Epoch 284, Loss: 0.0141


Training:  11%|█▏        | 285/2500 [00:51<06:45,  5.46it/s]

Epoch 285, Loss: 0.0141
Epoch 286, Loss: 0.0141


Training:  11%|█▏        | 287/2500 [00:52<06:44,  5.48it/s]

Epoch 287, Loss: 0.0141
Epoch 288, Loss: 0.0141


Training:  12%|█▏        | 289/2500 [00:52<06:41,  5.51it/s]

Epoch 289, Loss: 0.0141
Epoch 290, Loss: 0.0141


Training:  12%|█▏        | 291/2500 [00:52<06:45,  5.45it/s]

Epoch 291, Loss: 0.0141
Epoch 292, Loss: 0.0141


Training:  12%|█▏        | 293/2500 [00:53<06:46,  5.43it/s]

Epoch 293, Loss: 0.0141
Epoch 294, Loss: 0.0141


Training:  12%|█▏        | 295/2500 [00:53<06:41,  5.49it/s]

Epoch 295, Loss: 0.0141
Epoch 296, Loss: 0.0141


Training:  12%|█▏        | 297/2500 [00:53<06:36,  5.55it/s]

Epoch 297, Loss: 0.0141
Epoch 298, Loss: 0.0141


Training:  12%|█▏        | 299/2500 [00:54<06:35,  5.57it/s]

Epoch 299, Loss: 0.0141
Epoch 300, Loss: 0.0141


Training:  12%|█▏        | 301/2500 [00:54<06:36,  5.54it/s]

Epoch 301, Loss: 0.0141
Epoch 302, Loss: 0.0141


Training:  12%|█▏        | 303/2500 [00:55<06:35,  5.56it/s]

Epoch 303, Loss: 0.0141
Epoch 304, Loss: 0.0141


Training:  12%|█▏        | 305/2500 [00:55<06:36,  5.54it/s]

Epoch 305, Loss: 0.0141
Epoch 306, Loss: 0.0141


Training:  12%|█▏        | 307/2500 [00:55<06:34,  5.56it/s]

Epoch 307, Loss: 0.0141
Epoch 308, Loss: 0.0141


Training:  12%|█▏        | 309/2500 [00:56<06:31,  5.60it/s]

Epoch 309, Loss: 0.0141
Epoch 310, Loss: 0.0141


Training:  12%|█▏        | 311/2500 [00:56<06:31,  5.59it/s]

Epoch 311, Loss: 0.0141
Epoch 312, Loss: 0.0141


Training:  13%|█▎        | 313/2500 [00:56<06:31,  5.59it/s]

Epoch 313, Loss: 0.0141
Epoch 314, Loss: 0.0141


Training:  13%|█▎        | 315/2500 [00:57<06:31,  5.58it/s]

Epoch 315, Loss: 0.0141
Epoch 316, Loss: 0.0141


Training:  13%|█▎        | 317/2500 [00:57<06:33,  5.55it/s]

Epoch 317, Loss: 0.0141
Epoch 318, Loss: 0.0141


Training:  13%|█▎        | 319/2500 [00:57<06:30,  5.58it/s]

Epoch 319, Loss: 0.0141
Epoch 320, Loss: 0.0141


Training:  13%|█▎        | 321/2500 [00:58<06:29,  5.59it/s]

Epoch 321, Loss: 0.0141
Epoch 322, Loss: 0.0141


Training:  13%|█▎        | 323/2500 [00:58<06:31,  5.56it/s]

Epoch 323, Loss: 0.0141
Epoch 324, Loss: 0.0141


Training:  13%|█▎        | 325/2500 [00:58<06:30,  5.57it/s]

Epoch 325, Loss: 0.0141
Epoch 326, Loss: 0.0141


Training:  13%|█▎        | 327/2500 [00:59<06:29,  5.58it/s]

Epoch 327, Loss: 0.0141
Epoch 328, Loss: 0.0141


Training:  13%|█▎        | 329/2500 [00:59<06:30,  5.56it/s]

Epoch 329, Loss: 0.0141
Epoch 330, Loss: 0.0141


Training:  13%|█▎        | 331/2500 [01:00<06:29,  5.57it/s]

Epoch 331, Loss: 0.0141
Epoch 332, Loss: 0.0141


Training:  13%|█▎        | 333/2500 [01:00<06:27,  5.59it/s]

Epoch 333, Loss: 0.0141
Epoch 334, Loss: 0.0141


Training:  13%|█▎        | 335/2500 [01:00<06:28,  5.57it/s]

Epoch 335, Loss: 0.0141
Epoch 336, Loss: 0.0141


Training:  13%|█▎        | 337/2500 [01:01<06:27,  5.59it/s]

Epoch 337, Loss: 0.0141
Epoch 338, Loss: 0.0141


Training:  14%|█▎        | 339/2500 [01:01<06:28,  5.57it/s]

Epoch 339, Loss: 0.0141
Epoch 340, Loss: 0.0141


Training:  14%|█▎        | 341/2500 [01:01<06:28,  5.56it/s]

Epoch 341, Loss: 0.0141
Epoch 342, Loss: 0.0141


Training:  14%|█▎        | 343/2500 [01:02<06:27,  5.57it/s]

Epoch 343, Loss: 0.0141
Epoch 344, Loss: 0.0141


Training:  14%|█▍        | 345/2500 [01:02<06:26,  5.58it/s]

Epoch 345, Loss: 0.0141
Epoch 346, Loss: 0.0141


Training:  14%|█▍        | 347/2500 [01:02<06:28,  5.55it/s]

Epoch 347, Loss: 0.0141
Epoch 348, Loss: 0.0141


Training:  14%|█▍        | 349/2500 [01:03<06:25,  5.57it/s]

Epoch 349, Loss: 0.0141
Epoch 350, Loss: 0.0141


Training:  14%|█▍        | 351/2500 [01:03<06:31,  5.48it/s]

Epoch 351, Loss: 0.0141
Epoch 352, Loss: 0.0141


Training:  14%|█▍        | 353/2500 [01:04<06:34,  5.45it/s]

Epoch 353, Loss: 0.0141
Epoch 354, Loss: 0.0141


Training:  14%|█▍        | 355/2500 [01:04<06:32,  5.46it/s]

Epoch 355, Loss: 0.0141
Epoch 356, Loss: 0.0141


Training:  14%|█▍        | 357/2500 [01:04<06:29,  5.50it/s]

Epoch 357, Loss: 0.0141
Epoch 358, Loss: 0.0141


Training:  14%|█▍        | 359/2500 [01:05<06:33,  5.44it/s]

Epoch 359, Loss: 0.0141
Epoch 360, Loss: 0.0141


Training:  14%|█▍        | 361/2500 [01:05<06:34,  5.43it/s]

Epoch 361, Loss: 0.0141
Epoch 362, Loss: 0.0141


Training:  15%|█▍        | 363/2500 [01:05<06:27,  5.51it/s]

Epoch 363, Loss: 0.0141
Epoch 364, Loss: 0.0141


Training:  15%|█▍        | 365/2500 [01:06<06:25,  5.53it/s]

Epoch 365, Loss: 0.0141
Epoch 366, Loss: 0.0141


Training:  15%|█▍        | 367/2500 [01:06<06:23,  5.56it/s]

Epoch 367, Loss: 0.0141
Epoch 368, Loss: 0.0141


Training:  15%|█▍        | 369/2500 [01:06<06:22,  5.56it/s]

Epoch 369, Loss: 0.0141
Epoch 370, Loss: 0.0141


Training:  15%|█▍        | 371/2500 [01:07<06:21,  5.59it/s]

Epoch 371, Loss: 0.0141
Epoch 372, Loss: 0.0141


Training:  15%|█▍        | 373/2500 [01:07<06:20,  5.59it/s]

Epoch 373, Loss: 0.0141
Epoch 374, Loss: 0.0141


Training:  15%|█▌        | 375/2500 [01:08<06:21,  5.57it/s]

Epoch 375, Loss: 0.0141
Epoch 376, Loss: 0.0141


Training:  15%|█▌        | 377/2500 [01:08<06:21,  5.57it/s]

Epoch 377, Loss: 0.0141
Epoch 378, Loss: 0.0141


Training:  15%|█▌        | 379/2500 [01:08<06:20,  5.57it/s]

Epoch 379, Loss: 0.0141
Epoch 380, Loss: 0.0141


Training:  15%|█▌        | 381/2500 [01:09<06:19,  5.58it/s]

Epoch 381, Loss: 0.0141
Epoch 382, Loss: 0.0141


Training:  15%|█▌        | 383/2500 [01:09<06:18,  5.60it/s]

Epoch 383, Loss: 0.0141
Epoch 384, Loss: 0.0141


Training:  15%|█▌        | 385/2500 [01:09<06:19,  5.58it/s]

Epoch 385, Loss: 0.0141
Epoch 386, Loss: 0.0141


Training:  15%|█▌        | 387/2500 [01:10<06:19,  5.57it/s]

Epoch 387, Loss: 0.0141
Epoch 388, Loss: 0.0141


Training:  16%|█▌        | 389/2500 [01:10<06:18,  5.57it/s]

Epoch 389, Loss: 0.0141
Epoch 390, Loss: 0.0141


Training:  16%|█▌        | 391/2500 [01:10<06:17,  5.59it/s]

Epoch 391, Loss: 0.0141
Epoch 392, Loss: 0.0141


Training:  16%|█▌        | 393/2500 [01:11<06:17,  5.58it/s]

Epoch 393, Loss: 0.0141
Epoch 394, Loss: 0.0141


Training:  16%|█▌        | 395/2500 [01:11<06:16,  5.59it/s]

Epoch 395, Loss: 0.0141
Epoch 396, Loss: 0.0141


Training:  16%|█▌        | 397/2500 [01:11<06:16,  5.59it/s]

Epoch 397, Loss: 0.0141
Epoch 398, Loss: 0.0141


Training:  16%|█▌        | 399/2500 [01:12<06:17,  5.57it/s]

Epoch 399, Loss: 0.0141
Epoch 400, Loss: 0.0141


Training:  16%|█▌        | 401/2500 [01:12<06:17,  5.56it/s]

Epoch 401, Loss: 0.0141
Epoch 402, Loss: 0.0141


Training:  16%|█▌        | 403/2500 [01:13<06:16,  5.57it/s]

Epoch 403, Loss: 0.0141
Epoch 404, Loss: 0.0141


Training:  16%|█▌        | 405/2500 [01:13<06:15,  5.58it/s]

Epoch 405, Loss: 0.0141
Epoch 406, Loss: 0.0141


Training:  16%|█▋        | 407/2500 [01:13<06:15,  5.58it/s]

Epoch 407, Loss: 0.0141
Epoch 408, Loss: 0.0141


Training:  16%|█▋        | 409/2500 [01:14<06:14,  5.59it/s]

Epoch 409, Loss: 0.0141
Epoch 410, Loss: 0.0141


Training:  16%|█▋        | 411/2500 [01:14<06:14,  5.58it/s]

Epoch 411, Loss: 0.0141
Epoch 412, Loss: 0.0141


Training:  17%|█▋        | 413/2500 [01:14<06:14,  5.57it/s]

Epoch 413, Loss: 0.0141
Epoch 414, Loss: 0.0141


Training:  17%|█▋        | 415/2500 [01:15<06:15,  5.56it/s]

Epoch 415, Loss: 0.0141
Epoch 416, Loss: 0.0141


Training:  17%|█▋        | 417/2500 [01:15<06:17,  5.52it/s]

Epoch 417, Loss: 0.0141
Epoch 418, Loss: 0.0141


Training:  17%|█▋        | 419/2500 [01:15<06:20,  5.47it/s]

Epoch 419, Loss: 0.0141
Epoch 420, Loss: 0.0141


Training:  17%|█▋        | 421/2500 [01:16<06:22,  5.44it/s]

Epoch 421, Loss: 0.0141
Epoch 422, Loss: 0.0141


Training:  17%|█▋        | 423/2500 [01:16<06:19,  5.48it/s]

Epoch 423, Loss: 0.0141
Epoch 424, Loss: 0.0141


Training:  17%|█▋        | 425/2500 [01:17<06:23,  5.41it/s]

Epoch 425, Loss: 0.0141
Epoch 426, Loss: 0.0141


Training:  17%|█▋        | 427/2500 [01:17<06:25,  5.37it/s]

Epoch 427, Loss: 0.0141
Epoch 428, Loss: 0.0141


Training:  17%|█▋        | 429/2500 [01:17<06:21,  5.43it/s]

Epoch 429, Loss: 0.0141
Epoch 430, Loss: 0.0141


Training:  17%|█▋        | 431/2500 [01:18<06:15,  5.51it/s]

Epoch 431, Loss: 0.0141
Epoch 432, Loss: 0.0141


Training:  17%|█▋        | 433/2500 [01:18<06:12,  5.55it/s]

Epoch 433, Loss: 0.0141
Epoch 434, Loss: 0.0141


Training:  17%|█▋        | 435/2500 [01:18<06:12,  5.55it/s]

Epoch 435, Loss: 0.0141
Epoch 436, Loss: 0.0141


Training:  17%|█▋        | 437/2500 [01:19<06:11,  5.56it/s]

Epoch 437, Loss: 0.0141
Epoch 438, Loss: 0.0141


Training:  18%|█▊        | 439/2500 [01:19<06:12,  5.53it/s]

Epoch 439, Loss: 0.0141
Epoch 440, Loss: 0.0141


Training:  18%|█▊        | 441/2500 [01:19<06:10,  5.55it/s]

Epoch 441, Loss: 0.0141
Epoch 442, Loss: 0.0141


Training:  18%|█▊        | 443/2500 [01:20<06:12,  5.51it/s]

Epoch 443, Loss: 0.0141
Epoch 444, Loss: 0.0141


Training:  18%|█▊        | 445/2500 [01:20<06:10,  5.55it/s]

Epoch 445, Loss: 0.0141
Epoch 446, Loss: 0.0141


Training:  18%|█▊        | 447/2500 [01:20<06:09,  5.56it/s]

Epoch 447, Loss: 0.0141
Epoch 448, Loss: 0.0141


Training:  18%|█▊        | 449/2500 [01:21<06:08,  5.57it/s]

Epoch 449, Loss: 0.0141
Epoch 450, Loss: 0.0141


Training:  18%|█▊        | 451/2500 [01:21<06:07,  5.58it/s]

Epoch 451, Loss: 0.0141
Epoch 452, Loss: 0.0141


Training:  18%|█▊        | 453/2500 [01:22<06:07,  5.58it/s]

Epoch 453, Loss: 0.0141
Epoch 454, Loss: 0.0141


Training:  18%|█▊        | 455/2500 [01:22<06:07,  5.56it/s]

Epoch 455, Loss: 0.0141
Epoch 456, Loss: 0.0141


Training:  18%|█▊        | 457/2500 [01:22<06:07,  5.56it/s]

Epoch 457, Loss: 0.0141
Epoch 458, Loss: 0.0141


Training:  18%|█▊        | 459/2500 [01:23<06:06,  5.58it/s]

Epoch 459, Loss: 0.0141
Epoch 460, Loss: 0.0141


Training:  18%|█▊        | 461/2500 [01:23<06:08,  5.53it/s]

Epoch 461, Loss: 0.0141
Epoch 462, Loss: 0.0141


Training:  19%|█▊        | 463/2500 [01:23<06:06,  5.55it/s]

Epoch 463, Loss: 0.0141
Epoch 464, Loss: 0.0141


Training:  19%|█▊        | 465/2500 [01:24<06:05,  5.57it/s]

Epoch 465, Loss: 0.0141
Epoch 466, Loss: 0.0141


Training:  19%|█▊        | 467/2500 [01:24<06:05,  5.56it/s]

Epoch 467, Loss: 0.0141
Epoch 468, Loss: 0.0141


Training:  19%|█▉        | 469/2500 [01:24<06:04,  5.56it/s]

Epoch 469, Loss: 0.0141
Epoch 470, Loss: 0.0141


Training:  19%|█▉        | 471/2500 [01:25<06:04,  5.57it/s]

Epoch 471, Loss: 0.0141
Epoch 472, Loss: 0.0141


Training:  19%|█▉        | 473/2500 [01:25<06:04,  5.56it/s]

Epoch 473, Loss: 0.0141
Epoch 474, Loss: 0.0141


Training:  19%|█▉        | 475/2500 [01:26<06:03,  5.56it/s]

Epoch 475, Loss: 0.0141
Epoch 476, Loss: 0.0141


Training:  19%|█▉        | 477/2500 [01:26<06:02,  5.58it/s]

Epoch 477, Loss: 0.0141
Epoch 478, Loss: 0.0141


Training:  19%|█▉        | 479/2500 [01:26<06:02,  5.57it/s]

Epoch 479, Loss: 0.0141
Epoch 480, Loss: 0.0141


Training:  19%|█▉        | 481/2500 [01:27<06:02,  5.57it/s]

Epoch 481, Loss: 0.0141
Epoch 482, Loss: 0.0141


Training:  19%|█▉        | 483/2500 [01:27<06:08,  5.48it/s]

Epoch 483, Loss: 0.0141
Epoch 484, Loss: 0.0141


Training:  19%|█▉        | 485/2500 [01:27<06:33,  5.12it/s]

Epoch 485, Loss: 0.0141


Training:  19%|█▉        | 486/2500 [01:28<06:36,  5.09it/s]

Epoch 486, Loss: 0.0141
Epoch 487, Loss: 0.0141


Training:  20%|█▉        | 488/2500 [01:28<06:45,  4.96it/s]

Epoch 488, Loss: 0.0141


Training:  20%|█▉        | 489/2500 [01:28<06:49,  4.91it/s]

Epoch 489, Loss: 0.0141


Training:  20%|█▉        | 490/2500 [01:28<06:53,  4.86it/s]

Epoch 490, Loss: 0.0141


Training:  20%|█▉        | 491/2500 [01:29<06:55,  4.84it/s]

Epoch 491, Loss: 0.0141


Training:  20%|█▉        | 492/2500 [01:29<06:57,  4.81it/s]

Epoch 492, Loss: 0.0141
Epoch 493, Loss: 0.0141


Training:  20%|█▉        | 494/2500 [01:29<06:37,  5.04it/s]

Epoch 494, Loss: 0.0141
Epoch 495, Loss: 0.0141


Training:  20%|█▉        | 496/2500 [01:30<06:36,  5.06it/s]

Epoch 496, Loss: 0.0141


Training:  20%|█▉        | 497/2500 [01:30<06:36,  5.05it/s]

Epoch 497, Loss: 0.0141


Training:  20%|█▉        | 498/2500 [01:30<06:44,  4.95it/s]

Epoch 498, Loss: 0.0141
Epoch 499, Loss: 0.0141


Training:  20%|██        | 500/2500 [01:30<06:29,  5.13it/s]

Epoch 500, Loss: 0.0141
Epoch 501, Loss: 0.0141


Training:  20%|██        | 502/2500 [01:31<06:09,  5.41it/s]

THRESHOLDING: 9 active coefficients
Epoch 502, Loss: 0.0141


Training:  20%|██        | 503/2500 [01:31<06:06,  5.45it/s]

Epoch 503, Loss: 0.0141
Epoch 504, Loss: 0.0141


Training:  20%|██        | 505/2500 [01:31<06:02,  5.50it/s]

Epoch 505, Loss: 0.0141
Epoch 506, Loss: 0.0141


Training:  20%|██        | 507/2500 [01:32<06:00,  5.53it/s]

Epoch 507, Loss: 0.0141
Epoch 508, Loss: 0.0141


Training:  20%|██        | 509/2500 [01:32<05:58,  5.55it/s]

Epoch 509, Loss: 0.0141
Epoch 510, Loss: 0.0141


Training:  20%|██        | 511/2500 [01:32<05:58,  5.55it/s]

Epoch 511, Loss: 0.0141
Epoch 512, Loss: 0.0141


Training:  21%|██        | 513/2500 [01:33<05:57,  5.56it/s]

Epoch 513, Loss: 0.0141
Epoch 514, Loss: 0.0141


Training:  21%|██        | 515/2500 [01:33<05:57,  5.56it/s]

Epoch 515, Loss: 0.0141
Epoch 516, Loss: 0.0141


Training:  21%|██        | 517/2500 [01:33<05:57,  5.55it/s]

Epoch 517, Loss: 0.0141
Epoch 518, Loss: 0.0141


Training:  21%|██        | 519/2500 [01:34<05:55,  5.57it/s]

Epoch 519, Loss: 0.0141
Epoch 520, Loss: 0.0141


Training:  21%|██        | 521/2500 [01:34<05:56,  5.55it/s]

Epoch 521, Loss: 0.0141
Epoch 522, Loss: 0.0141


Training:  21%|██        | 523/2500 [01:35<05:55,  5.56it/s]

Epoch 523, Loss: 0.0141
Epoch 524, Loss: 0.0141


Training:  21%|██        | 525/2500 [01:35<05:54,  5.57it/s]

Epoch 525, Loss: 0.0141
Epoch 526, Loss: 0.0141


Training:  21%|██        | 527/2500 [01:35<05:53,  5.58it/s]

Epoch 527, Loss: 0.0141
Epoch 528, Loss: 0.0141


Training:  21%|██        | 529/2500 [01:36<05:53,  5.58it/s]

Epoch 529, Loss: 0.0141
Epoch 530, Loss: 0.0141


Training:  21%|██        | 531/2500 [01:36<05:53,  5.57it/s]

Epoch 531, Loss: 0.0141
Epoch 532, Loss: 0.0141


Training:  21%|██▏       | 533/2500 [01:36<05:53,  5.57it/s]

Epoch 533, Loss: 0.0141
Epoch 534, Loss: 0.0141


Training:  21%|██▏       | 535/2500 [01:37<05:54,  5.55it/s]

Epoch 535, Loss: 0.0141
Epoch 536, Loss: 0.0141


Training:  21%|██▏       | 537/2500 [01:37<05:54,  5.54it/s]

Epoch 537, Loss: 0.0141
Epoch 538, Loss: 0.0141


Training:  22%|██▏       | 539/2500 [01:37<05:56,  5.50it/s]

Epoch 539, Loss: 0.0141
Epoch 540, Loss: 0.0141


Training:  22%|██▏       | 541/2500 [01:38<05:55,  5.52it/s]

Epoch 541, Loss: 0.0141
Epoch 542, Loss: 0.0141


Training:  22%|██▏       | 543/2500 [01:38<05:54,  5.52it/s]

Epoch 543, Loss: 0.0141
Epoch 544, Loss: 0.0141


Training:  22%|██▏       | 545/2500 [01:38<05:54,  5.51it/s]

Epoch 545, Loss: 0.0141
Epoch 546, Loss: 0.0141


Training:  22%|██▏       | 547/2500 [01:39<05:54,  5.51it/s]

Epoch 547, Loss: 0.0141
Epoch 548, Loss: 0.0141


Training:  22%|██▏       | 549/2500 [01:39<05:53,  5.53it/s]

Epoch 549, Loss: 0.0141
Epoch 550, Loss: 0.0141


Training:  22%|██▏       | 551/2500 [01:40<05:53,  5.51it/s]

Epoch 551, Loss: 0.0141
Epoch 552, Loss: 0.0141


Training:  22%|██▏       | 553/2500 [01:40<05:53,  5.51it/s]

Epoch 553, Loss: 0.0141
Epoch 554, Loss: 0.0141


Training:  22%|██▏       | 555/2500 [01:40<05:52,  5.51it/s]

Epoch 555, Loss: 0.0141
Epoch 556, Loss: 0.0141


Training:  22%|██▏       | 557/2500 [01:41<06:01,  5.38it/s]

Epoch 557, Loss: 0.0141
Epoch 558, Loss: 0.0141


Training:  22%|██▏       | 559/2500 [01:41<05:58,  5.41it/s]

Epoch 559, Loss: 0.0141
Epoch 560, Loss: 0.0141


Training:  22%|██▏       | 561/2500 [01:41<05:56,  5.44it/s]

Epoch 561, Loss: 0.0141
Epoch 562, Loss: 0.0141


Training:  23%|██▎       | 563/2500 [01:42<05:55,  5.45it/s]

Epoch 563, Loss: 0.0141


Training:  23%|██▎       | 564/2500 [01:42<06:05,  5.29it/s]

Epoch 564, Loss: 0.0141
Epoch 565, Loss: 0.0141


Training:  23%|██▎       | 566/2500 [01:42<06:03,  5.32it/s]

Epoch 566, Loss: 0.0141
Epoch 567, Loss: 0.0141


Training:  23%|██▎       | 568/2500 [01:43<05:58,  5.39it/s]

Epoch 568, Loss: 0.0141
Epoch 569, Loss: 0.0141


Training:  23%|██▎       | 570/2500 [01:43<05:53,  5.45it/s]

Epoch 570, Loss: 0.0141
Epoch 571, Loss: 0.0141


Training:  23%|██▎       | 572/2500 [01:43<05:51,  5.49it/s]

Epoch 572, Loss: 0.0141
Epoch 573, Loss: 0.0141


Training:  23%|██▎       | 574/2500 [01:44<05:50,  5.49it/s]

Epoch 574, Loss: 0.0141
Epoch 575, Loss: 0.0141


Training:  23%|██▎       | 576/2500 [01:44<05:49,  5.51it/s]

Epoch 576, Loss: 0.0141
Epoch 577, Loss: 0.0141


Training:  23%|██▎       | 578/2500 [01:45<05:48,  5.52it/s]

Epoch 578, Loss: 0.0141
Epoch 579, Loss: 0.0141


Training:  23%|██▎       | 580/2500 [01:45<05:48,  5.50it/s]

Epoch 580, Loss: 0.0141
Epoch 581, Loss: 0.0141


Training:  23%|██▎       | 582/2500 [01:45<05:47,  5.52it/s]

Epoch 582, Loss: 0.0141
Epoch 583, Loss: 0.0141


Training:  23%|██▎       | 584/2500 [01:46<05:47,  5.52it/s]

Epoch 584, Loss: 0.0141
Epoch 585, Loss: 0.0141


Training:  23%|██▎       | 586/2500 [01:46<05:46,  5.52it/s]

Epoch 586, Loss: 0.0141
Epoch 587, Loss: 0.0141


Training:  24%|██▎       | 588/2500 [01:46<05:45,  5.53it/s]

Epoch 588, Loss: 0.0141
Epoch 589, Loss: 0.0141


Training:  24%|██▎       | 590/2500 [01:47<05:44,  5.54it/s]

Epoch 590, Loss: 0.0141
Epoch 591, Loss: 0.0141


Training:  24%|██▎       | 592/2500 [01:47<05:44,  5.53it/s]

Epoch 592, Loss: 0.0141
Epoch 593, Loss: 0.0141


Training:  24%|██▍       | 594/2500 [01:47<05:44,  5.53it/s]

Epoch 594, Loss: 0.0141
Epoch 595, Loss: 0.0141


Training:  24%|██▍       | 596/2500 [01:48<05:45,  5.51it/s]

Epoch 596, Loss: 0.0141
Epoch 597, Loss: 0.0141


Training:  24%|██▍       | 598/2500 [01:48<05:46,  5.49it/s]

Epoch 598, Loss: 0.0141
Epoch 599, Loss: 0.0141


Training:  24%|██▍       | 600/2500 [01:49<05:47,  5.46it/s]

Epoch 600, Loss: 0.0141
Epoch 601, Loss: 0.0141


Training:  24%|██▍       | 602/2500 [01:49<05:45,  5.49it/s]

Epoch 602, Loss: 0.0141
Epoch 603, Loss: 0.0141


Training:  24%|██▍       | 604/2500 [01:49<05:44,  5.50it/s]

Epoch 604, Loss: 0.0141
Epoch 605, Loss: 0.0141


Training:  24%|██▍       | 606/2500 [01:50<05:43,  5.51it/s]

Epoch 606, Loss: 0.0141
Epoch 607, Loss: 0.0141


Training:  24%|██▍       | 608/2500 [01:50<05:47,  5.45it/s]

Epoch 608, Loss: 0.0141
Epoch 609, Loss: 0.0141


Training:  24%|██▍       | 610/2500 [01:50<05:43,  5.51it/s]

Epoch 610, Loss: 0.0141
Epoch 611, Loss: 0.0141


Training:  24%|██▍       | 612/2500 [01:51<05:41,  5.53it/s]

Epoch 612, Loss: 0.0141
Epoch 613, Loss: 0.0141


Training:  25%|██▍       | 614/2500 [01:51<05:42,  5.51it/s]

Epoch 614, Loss: 0.0141
Epoch 615, Loss: 0.0141


Training:  25%|██▍       | 616/2500 [01:51<05:40,  5.53it/s]

Epoch 616, Loss: 0.0141
Epoch 617, Loss: 0.0141


Training:  25%|██▍       | 618/2500 [01:52<05:39,  5.55it/s]

Epoch 618, Loss: 0.0141
Epoch 619, Loss: 0.0141


Training:  25%|██▍       | 620/2500 [01:52<05:40,  5.52it/s]

Epoch 620, Loss: 0.0141
Epoch 621, Loss: 0.0141


Training:  25%|██▍       | 622/2500 [01:53<05:41,  5.49it/s]

Epoch 622, Loss: 0.0141
Epoch 623, Loss: 0.0141


Training:  25%|██▍       | 624/2500 [01:53<05:43,  5.45it/s]

Epoch 624, Loss: 0.0141
Epoch 625, Loss: 0.0141


Training:  25%|██▌       | 626/2500 [01:53<05:44,  5.45it/s]

Epoch 626, Loss: 0.0141
Epoch 627, Loss: 0.0141


Training:  25%|██▌       | 628/2500 [01:54<05:44,  5.44it/s]

Epoch 628, Loss: 0.0141
Epoch 629, Loss: 0.0141


Training:  25%|██▌       | 630/2500 [01:54<05:46,  5.39it/s]

Epoch 630, Loss: 0.0141
Epoch 631, Loss: 0.0141


Training:  25%|██▌       | 632/2500 [01:54<05:51,  5.32it/s]

Epoch 632, Loss: 0.0141
Epoch 633, Loss: 0.0141


Training:  25%|██▌       | 634/2500 [01:55<05:45,  5.40it/s]

Epoch 634, Loss: 0.0141
Epoch 635, Loss: 0.0141


Training:  25%|██▌       | 636/2500 [01:55<05:39,  5.49it/s]

Epoch 636, Loss: 0.0141
Epoch 637, Loss: 0.0141


Training:  26%|██▌       | 638/2500 [01:55<05:38,  5.50it/s]

Epoch 638, Loss: 0.0141
Epoch 639, Loss: 0.0141


Training:  26%|██▌       | 640/2500 [01:56<05:36,  5.53it/s]

Epoch 640, Loss: 0.0141
Epoch 641, Loss: 0.0141


Training:  26%|██▌       | 642/2500 [01:56<05:37,  5.50it/s]

Epoch 642, Loss: 0.0141
Epoch 643, Loss: 0.0141


Training:  26%|██▌       | 644/2500 [01:57<05:35,  5.52it/s]

Epoch 644, Loss: 0.0141
Epoch 645, Loss: 0.0141


Training:  26%|██▌       | 646/2500 [01:57<05:34,  5.54it/s]

Epoch 646, Loss: 0.0141
Epoch 647, Loss: 0.0141


Training:  26%|██▌       | 648/2500 [01:57<05:34,  5.53it/s]

Epoch 648, Loss: 0.0141
Epoch 649, Loss: 0.0141


Training:  26%|██▌       | 650/2500 [01:58<05:33,  5.55it/s]

Epoch 650, Loss: 0.0141
Epoch 651, Loss: 0.0141


Training:  26%|██▌       | 652/2500 [01:58<05:33,  5.55it/s]

Epoch 652, Loss: 0.0141
Epoch 653, Loss: 0.0141


Training:  26%|██▌       | 654/2500 [01:58<05:33,  5.53it/s]

Epoch 654, Loss: 0.0141
Epoch 655, Loss: 0.0141


Training:  26%|██▌       | 656/2500 [01:59<05:32,  5.55it/s]

Epoch 656, Loss: 0.0141
Epoch 657, Loss: 0.0141


Training:  26%|██▋       | 658/2500 [01:59<05:31,  5.55it/s]

Epoch 658, Loss: 0.0141
Epoch 659, Loss: 0.0141


Training:  26%|██▋       | 660/2500 [01:59<05:32,  5.54it/s]

Epoch 660, Loss: 0.0141
Epoch 661, Loss: 0.0141


Training:  26%|██▋       | 662/2500 [02:00<05:30,  5.56it/s]

Epoch 662, Loss: 0.0141
Epoch 663, Loss: 0.0141


Training:  27%|██▋       | 664/2500 [02:00<05:31,  5.53it/s]

Epoch 664, Loss: 0.0141
Epoch 665, Loss: 0.0141


Training:  27%|██▋       | 666/2500 [02:01<05:33,  5.50it/s]

Epoch 666, Loss: 0.0141
Epoch 667, Loss: 0.0141


Training:  27%|██▋       | 668/2500 [02:01<05:31,  5.53it/s]

Epoch 668, Loss: 0.0141
Epoch 669, Loss: 0.0141


Training:  27%|██▋       | 670/2500 [02:01<05:29,  5.55it/s]

Epoch 670, Loss: 0.0141
Epoch 671, Loss: 0.0141


Training:  27%|██▋       | 672/2500 [02:02<05:29,  5.55it/s]

Epoch 672, Loss: 0.0141
Epoch 673, Loss: 0.0141


Training:  27%|██▋       | 674/2500 [02:02<05:29,  5.55it/s]

Epoch 674, Loss: 0.0141
Epoch 675, Loss: 0.0141


Training:  27%|██▋       | 676/2500 [02:02<05:28,  5.55it/s]

Epoch 676, Loss: 0.0141
Epoch 677, Loss: 0.0141


Training:  27%|██▋       | 678/2500 [02:03<05:29,  5.54it/s]

Epoch 678, Loss: 0.0141
Epoch 679, Loss: 0.0141


Training:  27%|██▋       | 680/2500 [02:03<05:28,  5.55it/s]

Epoch 680, Loss: 0.0141
Epoch 681, Loss: 0.0141


Training:  27%|██▋       | 682/2500 [02:03<05:28,  5.53it/s]

Epoch 682, Loss: 0.0141
Epoch 683, Loss: 0.0141


Training:  27%|██▋       | 684/2500 [02:04<05:27,  5.55it/s]

Epoch 684, Loss: 0.0141
Epoch 685, Loss: 0.0141


Training:  27%|██▋       | 686/2500 [02:04<05:26,  5.55it/s]

Epoch 686, Loss: 0.0141
Epoch 687, Loss: 0.0141


Training:  28%|██▊       | 688/2500 [02:05<05:27,  5.54it/s]

Epoch 688, Loss: 0.0141
Epoch 689, Loss: 0.0141


Training:  28%|██▊       | 690/2500 [02:05<05:32,  5.45it/s]

Epoch 690, Loss: 0.0141
Epoch 691, Loss: 0.0141


Training:  28%|██▊       | 692/2500 [02:05<05:32,  5.44it/s]

Epoch 692, Loss: 0.0141
Epoch 693, Loss: 0.0141


Training:  28%|██▊       | 694/2500 [02:06<05:38,  5.33it/s]

Epoch 694, Loss: 0.0141
Epoch 695, Loss: 0.0141


Training:  28%|██▊       | 696/2500 [02:06<05:34,  5.39it/s]

Epoch 696, Loss: 0.0141
Epoch 697, Loss: 0.0141


Training:  28%|██▊       | 698/2500 [02:06<05:37,  5.33it/s]

Epoch 698, Loss: 0.0141
Epoch 699, Loss: 0.0141


Training:  28%|██▊       | 700/2500 [02:07<05:36,  5.35it/s]

Epoch 700, Loss: 0.0141
Epoch 701, Loss: 0.0141


Training:  28%|██▊       | 702/2500 [02:07<05:29,  5.45it/s]

Epoch 702, Loss: 0.0141
Epoch 703, Loss: 0.0141


Training:  28%|██▊       | 704/2500 [02:07<05:26,  5.50it/s]

Epoch 704, Loss: 0.0141
Epoch 705, Loss: 0.0141


Training:  28%|██▊       | 706/2500 [02:08<05:24,  5.53it/s]

Epoch 706, Loss: 0.0141
Epoch 707, Loss: 0.0141


Training:  28%|██▊       | 708/2500 [02:08<05:23,  5.54it/s]

Epoch 708, Loss: 0.0141
Epoch 709, Loss: 0.0141


Training:  28%|██▊       | 710/2500 [02:09<05:22,  5.55it/s]

Epoch 710, Loss: 0.0141
Epoch 711, Loss: 0.0141


Training:  28%|██▊       | 712/2500 [02:09<05:22,  5.54it/s]

Epoch 712, Loss: 0.0141
Epoch 713, Loss: 0.0141


Training:  29%|██▊       | 714/2500 [02:09<05:21,  5.55it/s]

Epoch 714, Loss: 0.0141
Epoch 715, Loss: 0.0141


Training:  29%|██▊       | 716/2500 [02:10<05:21,  5.55it/s]

Epoch 716, Loss: 0.0141
Epoch 717, Loss: 0.0141


Training:  29%|██▊       | 718/2500 [02:10<05:21,  5.54it/s]

Epoch 718, Loss: 0.0141
Epoch 719, Loss: 0.0141


Training:  29%|██▉       | 720/2500 [02:10<05:22,  5.53it/s]

Epoch 720, Loss: 0.0141
Epoch 721, Loss: 0.0141


Training:  29%|██▉       | 722/2500 [02:11<05:20,  5.55it/s]

Epoch 722, Loss: 0.0141
Epoch 723, Loss: 0.0141


Training:  29%|██▉       | 724/2500 [02:11<05:20,  5.55it/s]

Epoch 724, Loss: 0.0141
Epoch 725, Loss: 0.0141


Training:  29%|██▉       | 726/2500 [02:11<05:20,  5.54it/s]

Epoch 726, Loss: 0.0141
Epoch 727, Loss: 0.0141


Training:  29%|██▉       | 728/2500 [02:12<05:20,  5.54it/s]

Epoch 728, Loss: 0.0141
Epoch 729, Loss: 0.0141


Training:  29%|██▉       | 730/2500 [02:12<05:18,  5.55it/s]

Epoch 730, Loss: 0.0141
Epoch 731, Loss: 0.0141


Training:  29%|██▉       | 732/2500 [02:13<05:18,  5.56it/s]

Epoch 732, Loss: 0.0141
Epoch 733, Loss: 0.0141


Training:  29%|██▉       | 734/2500 [02:13<05:19,  5.53it/s]

Epoch 734, Loss: 0.0141
Epoch 735, Loss: 0.0141


Training:  29%|██▉       | 736/2500 [02:13<05:18,  5.54it/s]

Epoch 736, Loss: 0.0141
Epoch 737, Loss: 0.0141


Training:  30%|██▉       | 738/2500 [02:14<05:17,  5.56it/s]

Epoch 738, Loss: 0.0141
Epoch 739, Loss: 0.0141


Training:  30%|██▉       | 740/2500 [02:14<05:17,  5.55it/s]

Epoch 740, Loss: 0.0141
Epoch 741, Loss: 0.0141


Training:  30%|██▉       | 742/2500 [02:14<05:16,  5.55it/s]

Epoch 742, Loss: 0.0141
Epoch 743, Loss: 0.0141


Training:  30%|██▉       | 744/2500 [02:15<05:16,  5.55it/s]

Epoch 744, Loss: 0.0141
Epoch 745, Loss: 0.0141


Training:  30%|██▉       | 746/2500 [02:15<05:17,  5.53it/s]

Epoch 746, Loss: 0.0141
Epoch 747, Loss: 0.0141


Training:  30%|██▉       | 748/2500 [02:15<05:16,  5.54it/s]

Epoch 748, Loss: 0.0141
Epoch 749, Loss: 0.0141


Training:  30%|███       | 750/2500 [02:16<05:14,  5.56it/s]

Epoch 750, Loss: 0.0141
Epoch 751, Loss: 0.0141


Training:  30%|███       | 752/2500 [02:16<05:11,  5.60it/s]

THRESHOLDING: 9 active coefficients
Epoch 752, Loss: 0.0141


Training:  30%|███       | 753/2500 [02:16<05:13,  5.58it/s]

Epoch 753, Loss: 0.0141
Epoch 754, Loss: 0.0141


Training:  30%|███       | 755/2500 [02:17<05:13,  5.56it/s]

Epoch 755, Loss: 0.0141
Epoch 756, Loss: 0.0141


Training:  30%|███       | 757/2500 [02:17<05:24,  5.37it/s]

Epoch 757, Loss: 0.0141
Epoch 758, Loss: 0.0141


Training:  30%|███       | 759/2500 [02:17<05:21,  5.41it/s]

Epoch 759, Loss: 0.0141
Epoch 760, Loss: 0.0141


Training:  30%|███       | 761/2500 [02:18<05:19,  5.45it/s]

Epoch 761, Loss: 0.0141
Epoch 762, Loss: 0.0141


Training:  31%|███       | 763/2500 [02:18<05:19,  5.44it/s]

Epoch 763, Loss: 0.0141
Epoch 764, Loss: 0.0141


Training:  31%|███       | 765/2500 [02:19<05:22,  5.38it/s]

Epoch 765, Loss: 0.0141
Epoch 766, Loss: 0.0141


Training:  31%|███       | 767/2500 [02:19<05:21,  5.39it/s]

Epoch 767, Loss: 0.0141
Epoch 768, Loss: 0.0141


Training:  31%|███       | 769/2500 [02:19<05:16,  5.46it/s]

Epoch 769, Loss: 0.0141
Epoch 770, Loss: 0.0141


Training:  31%|███       | 771/2500 [02:20<05:13,  5.51it/s]

Epoch 771, Loss: 0.0141
Epoch 772, Loss: 0.0141


Training:  31%|███       | 773/2500 [02:20<05:12,  5.52it/s]

Epoch 773, Loss: 0.0141
Epoch 774, Loss: 0.0141


Training:  31%|███       | 775/2500 [02:20<05:13,  5.50it/s]

Epoch 775, Loss: 0.0141
Epoch 776, Loss: 0.0141


Training:  31%|███       | 777/2500 [02:21<05:11,  5.53it/s]

Epoch 777, Loss: 0.0141
Epoch 778, Loss: 0.0141


Training:  31%|███       | 779/2500 [02:21<05:11,  5.52it/s]

Epoch 779, Loss: 0.0141
Epoch 780, Loss: 0.0141


Training:  31%|███       | 781/2500 [02:21<05:10,  5.54it/s]

Epoch 781, Loss: 0.0141
Epoch 782, Loss: 0.0141


Training:  31%|███▏      | 783/2500 [02:22<05:09,  5.55it/s]

Epoch 783, Loss: 0.0141
Epoch 784, Loss: 0.0141


Training:  31%|███▏      | 785/2500 [02:22<05:08,  5.56it/s]

Epoch 785, Loss: 0.0141
Epoch 786, Loss: 0.0141


Training:  31%|███▏      | 787/2500 [02:23<05:08,  5.56it/s]

Epoch 787, Loss: 0.0141
Epoch 788, Loss: 0.0141


Training:  32%|███▏      | 789/2500 [02:23<05:07,  5.56it/s]

Epoch 789, Loss: 0.0141
Epoch 790, Loss: 0.0141


Training:  32%|███▏      | 791/2500 [02:23<05:08,  5.53it/s]

Epoch 791, Loss: 0.0141
Epoch 792, Loss: 0.0141


Training:  32%|███▏      | 793/2500 [02:24<05:07,  5.55it/s]

Epoch 793, Loss: 0.0141
Epoch 794, Loss: 0.0141


Training:  32%|███▏      | 795/2500 [02:24<05:07,  5.54it/s]

Epoch 795, Loss: 0.0141
Epoch 796, Loss: 0.0141


Training:  32%|███▏      | 797/2500 [02:24<05:07,  5.54it/s]

Epoch 797, Loss: 0.0141
Epoch 798, Loss: 0.0141


Training:  32%|███▏      | 799/2500 [02:25<05:06,  5.55it/s]

Epoch 799, Loss: 0.0141
Epoch 800, Loss: 0.0141


Training:  32%|███▏      | 801/2500 [02:25<05:05,  5.56it/s]

Epoch 801, Loss: 0.0141
Epoch 802, Loss: 0.0141


Training:  32%|███▏      | 803/2500 [02:25<05:05,  5.55it/s]

Epoch 803, Loss: 0.0141
Epoch 804, Loss: 0.0141


Training:  32%|███▏      | 805/2500 [02:26<05:05,  5.55it/s]

Epoch 805, Loss: 0.0141
Epoch 806, Loss: 0.0141


Training:  32%|███▏      | 807/2500 [02:26<05:04,  5.56it/s]

Epoch 807, Loss: 0.0141
Epoch 808, Loss: 0.0141


Training:  32%|███▏      | 809/2500 [02:26<05:04,  5.54it/s]

Epoch 809, Loss: 0.0141
Epoch 810, Loss: 0.0141


Training:  32%|███▏      | 811/2500 [02:27<05:04,  5.55it/s]

Epoch 811, Loss: 0.0141
Epoch 812, Loss: 0.0141


Training:  33%|███▎      | 813/2500 [02:27<05:03,  5.55it/s]

Epoch 813, Loss: 0.0141
Epoch 814, Loss: 0.0141


Training:  33%|███▎      | 815/2500 [02:28<05:04,  5.54it/s]

Epoch 815, Loss: 0.0141
Epoch 816, Loss: 0.0141


Training:  33%|███▎      | 817/2500 [02:28<05:02,  5.56it/s]

Epoch 817, Loss: 0.0141
Epoch 818, Loss: 0.0141


Training:  33%|███▎      | 819/2500 [02:28<05:04,  5.52it/s]

Epoch 819, Loss: 0.0141
Epoch 820, Loss: 0.0141


Training:  33%|███▎      | 821/2500 [02:29<05:02,  5.55it/s]

Epoch 821, Loss: 0.0141
Epoch 822, Loss: 0.0141


Training:  33%|███▎      | 823/2500 [02:29<05:06,  5.48it/s]

Epoch 823, Loss: 0.0141
Epoch 824, Loss: 0.0141


Training:  33%|███▎      | 825/2500 [02:29<05:07,  5.45it/s]

Epoch 825, Loss: 0.0141
Epoch 826, Loss: 0.0141


Training:  33%|███▎      | 827/2500 [02:30<05:07,  5.44it/s]

Epoch 827, Loss: 0.0141
Epoch 828, Loss: 0.0141


Training:  33%|███▎      | 829/2500 [02:30<05:07,  5.44it/s]

Epoch 829, Loss: 0.0141
Epoch 830, Loss: 0.0141


Training:  33%|███▎      | 831/2500 [02:30<05:09,  5.39it/s]

Epoch 831, Loss: 0.0141
Epoch 832, Loss: 0.0141


Training:  33%|███▎      | 833/2500 [02:31<05:10,  5.37it/s]

Epoch 833, Loss: 0.0141
Epoch 834, Loss: 0.0141


Training:  33%|███▎      | 835/2500 [02:31<05:06,  5.44it/s]

Epoch 835, Loss: 0.0141
Epoch 836, Loss: 0.0141


Training:  33%|███▎      | 837/2500 [02:32<05:02,  5.49it/s]

Epoch 837, Loss: 0.0141
Epoch 838, Loss: 0.0141


Training:  34%|███▎      | 839/2500 [02:32<05:00,  5.53it/s]

Epoch 839, Loss: 0.0141
Epoch 840, Loss: 0.0141


Training:  34%|███▎      | 841/2500 [02:32<05:00,  5.52it/s]

Epoch 841, Loss: 0.0141
Epoch 842, Loss: 0.0141


Training:  34%|███▎      | 843/2500 [02:33<04:59,  5.54it/s]

Epoch 843, Loss: 0.0141
Epoch 844, Loss: 0.0141


Training:  34%|███▍      | 845/2500 [02:33<04:57,  5.56it/s]

Epoch 845, Loss: 0.0141
Epoch 846, Loss: 0.0141


Training:  34%|███▍      | 847/2500 [02:33<04:57,  5.56it/s]

Epoch 847, Loss: 0.0141
Epoch 848, Loss: 0.0141


Training:  34%|███▍      | 849/2500 [02:34<04:57,  5.54it/s]

Epoch 849, Loss: 0.0141
Epoch 850, Loss: 0.0141


Training:  34%|███▍      | 851/2500 [02:34<04:57,  5.55it/s]

Epoch 851, Loss: 0.0141
Epoch 852, Loss: 0.0141


Training:  34%|███▍      | 853/2500 [02:34<04:56,  5.55it/s]

Epoch 853, Loss: 0.0141
Epoch 854, Loss: 0.0141


Training:  34%|███▍      | 855/2500 [02:35<04:57,  5.53it/s]

Epoch 855, Loss: 0.0141
Epoch 856, Loss: 0.0141


Training:  34%|███▍      | 857/2500 [02:35<04:56,  5.54it/s]

Epoch 857, Loss: 0.0141
Epoch 858, Loss: 0.0141


Training:  34%|███▍      | 859/2500 [02:36<04:57,  5.52it/s]

Epoch 859, Loss: 0.0141
Epoch 860, Loss: 0.0141


Training:  34%|███▍      | 861/2500 [02:36<04:55,  5.55it/s]

Epoch 861, Loss: 0.0141
Epoch 862, Loss: 0.0141


Training:  35%|███▍      | 863/2500 [02:36<04:54,  5.56it/s]

Epoch 863, Loss: 0.0141
Epoch 864, Loss: 0.0141


Training:  35%|███▍      | 865/2500 [02:37<04:54,  5.56it/s]

Epoch 865, Loss: 0.0141
Epoch 866, Loss: 0.0141


Training:  35%|███▍      | 867/2500 [02:37<04:54,  5.55it/s]

Epoch 867, Loss: 0.0141
Epoch 868, Loss: 0.0141


Training:  35%|███▍      | 869/2500 [02:37<04:54,  5.54it/s]

Epoch 869, Loss: 0.0141
Epoch 870, Loss: 0.0141


Training:  35%|███▍      | 871/2500 [02:38<04:54,  5.53it/s]

Epoch 871, Loss: 0.0141
Epoch 872, Loss: 0.0141


Training:  35%|███▍      | 873/2500 [02:38<04:53,  5.54it/s]

Epoch 873, Loss: 0.0141
Epoch 874, Loss: 0.0141


Training:  35%|███▌      | 875/2500 [02:38<04:53,  5.54it/s]

Epoch 875, Loss: 0.0141
Epoch 876, Loss: 0.0141


Training:  35%|███▌      | 877/2500 [02:39<04:53,  5.53it/s]

Epoch 877, Loss: 0.0141
Epoch 878, Loss: 0.0141


Training:  35%|███▌      | 879/2500 [02:39<04:52,  5.54it/s]

Epoch 879, Loss: 0.0141
Epoch 880, Loss: 0.0141


Training:  35%|███▌      | 881/2500 [02:40<04:51,  5.55it/s]

Epoch 881, Loss: 0.0141
Epoch 882, Loss: 0.0141


Training:  35%|███▌      | 883/2500 [02:40<04:51,  5.55it/s]

Epoch 883, Loss: 0.0141
Epoch 884, Loss: 0.0141


Training:  35%|███▌      | 885/2500 [02:40<04:50,  5.55it/s]

Epoch 885, Loss: 0.0141
Epoch 886, Loss: 0.0141


Training:  35%|███▌      | 887/2500 [02:41<04:50,  5.55it/s]

Epoch 887, Loss: 0.0141
Epoch 888, Loss: 0.0141


Training:  36%|███▌      | 889/2500 [02:41<04:53,  5.50it/s]

Epoch 889, Loss: 0.0141
Epoch 890, Loss: 0.0141


Training:  36%|███▌      | 891/2500 [02:41<04:56,  5.43it/s]

Epoch 891, Loss: 0.0141
Epoch 892, Loss: 0.0141


Training:  36%|███▌      | 893/2500 [02:42<04:55,  5.44it/s]

Epoch 893, Loss: 0.0141
Epoch 894, Loss: 0.0141


Training:  36%|███▌      | 895/2500 [02:42<04:54,  5.45it/s]

Epoch 895, Loss: 0.0141
Epoch 896, Loss: 0.0141


Training:  36%|███▌      | 897/2500 [02:42<04:59,  5.36it/s]

Epoch 897, Loss: 0.0141
Epoch 898, Loss: 0.0141


Training:  36%|███▌      | 899/2500 [02:43<04:59,  5.35it/s]

Epoch 899, Loss: 0.0141
Epoch 900, Loss: 0.0141


Training:  36%|███▌      | 901/2500 [02:43<04:56,  5.39it/s]

Epoch 901, Loss: 0.0141
Epoch 902, Loss: 0.0141


Training:  36%|███▌      | 903/2500 [02:44<04:52,  5.47it/s]

Epoch 903, Loss: 0.0141
Epoch 904, Loss: 0.0141


Training:  36%|███▌      | 905/2500 [02:44<04:49,  5.51it/s]

Epoch 905, Loss: 0.0141
Epoch 906, Loss: 0.0141


Training:  36%|███▋      | 907/2500 [02:44<04:47,  5.53it/s]

Epoch 907, Loss: 0.0141
Epoch 908, Loss: 0.0141


Training:  36%|███▋      | 909/2500 [02:45<04:46,  5.55it/s]

Epoch 909, Loss: 0.0141
Epoch 910, Loss: 0.0141


Training:  36%|███▋      | 911/2500 [02:45<04:47,  5.53it/s]

Epoch 911, Loss: 0.0141
Epoch 912, Loss: 0.0141


Training:  37%|███▋      | 913/2500 [02:45<04:46,  5.54it/s]

Epoch 913, Loss: 0.0141
Epoch 914, Loss: 0.0141


Training:  37%|███▋      | 915/2500 [02:46<04:45,  5.55it/s]

Epoch 915, Loss: 0.0141
Epoch 916, Loss: 0.0141


Training:  37%|███▋      | 917/2500 [02:46<04:45,  5.54it/s]

Epoch 917, Loss: 0.0141
Epoch 918, Loss: 0.0141


Training:  37%|███▋      | 919/2500 [02:46<04:44,  5.55it/s]

Epoch 919, Loss: 0.0141
Epoch 920, Loss: 0.0141


Training:  37%|███▋      | 921/2500 [02:47<04:43,  5.56it/s]

Epoch 921, Loss: 0.0141
Epoch 922, Loss: 0.0141


Training:  37%|███▋      | 923/2500 [02:47<04:44,  5.54it/s]

Epoch 923, Loss: 0.0141
Epoch 924, Loss: 0.0141


Training:  37%|███▋      | 925/2500 [02:48<04:43,  5.56it/s]

Epoch 925, Loss: 0.0141
Epoch 926, Loss: 0.0141


Training:  37%|███▋      | 927/2500 [02:48<04:43,  5.56it/s]

Epoch 927, Loss: 0.0141
Epoch 928, Loss: 0.0141


Training:  37%|███▋      | 929/2500 [02:48<04:43,  5.55it/s]

Epoch 929, Loss: 0.0141
Epoch 930, Loss: 0.0141


Training:  37%|███▋      | 931/2500 [02:49<04:44,  5.52it/s]

Epoch 931, Loss: 0.0141
Epoch 932, Loss: 0.0141


Training:  37%|███▋      | 933/2500 [02:49<04:43,  5.53it/s]

Epoch 933, Loss: 0.0141
Epoch 934, Loss: 0.0141


Training:  37%|███▋      | 935/2500 [02:49<04:44,  5.50it/s]

Epoch 935, Loss: 0.0141
Epoch 936, Loss: 0.0141


Training:  37%|███▋      | 937/2500 [02:50<04:43,  5.51it/s]

Epoch 937, Loss: 0.0141
Epoch 938, Loss: 0.0141


Training:  38%|███▊      | 939/2500 [02:50<04:44,  5.49it/s]

Epoch 939, Loss: 0.0141
Epoch 940, Loss: 0.0141


Training:  38%|███▊      | 941/2500 [02:50<04:42,  5.53it/s]

Epoch 941, Loss: 0.0141
Epoch 942, Loss: 0.0141


Training:  38%|███▊      | 943/2500 [02:51<04:41,  5.54it/s]

Epoch 943, Loss: 0.0141
Epoch 944, Loss: 0.0141


Training:  38%|███▊      | 945/2500 [02:51<04:41,  5.53it/s]

Epoch 945, Loss: 0.0141
Epoch 946, Loss: 0.0141


Training:  38%|███▊      | 947/2500 [02:52<04:41,  5.52it/s]

Epoch 947, Loss: 0.0141
Epoch 948, Loss: 0.0141


Training:  38%|███▊      | 949/2500 [02:52<04:40,  5.52it/s]

Epoch 949, Loss: 0.0141
Epoch 950, Loss: 0.0141


Training:  38%|███▊      | 951/2500 [02:52<04:39,  5.54it/s]

Epoch 951, Loss: 0.0141
Epoch 952, Loss: 0.0141


Training:  38%|███▊      | 953/2500 [02:53<04:38,  5.55it/s]

Epoch 953, Loss: 0.0141
Epoch 954, Loss: 0.0141


Training:  38%|███▊      | 955/2500 [02:53<04:38,  5.55it/s]

Epoch 955, Loss: 0.0141
Epoch 956, Loss: 0.0141


Training:  38%|███▊      | 957/2500 [02:53<04:45,  5.41it/s]

Epoch 957, Loss: 0.0141
Epoch 958, Loss: 0.0141


Training:  38%|███▊      | 959/2500 [02:54<04:44,  5.41it/s]

Epoch 959, Loss: 0.0141
Epoch 960, Loss: 0.0141


Training:  38%|███▊      | 961/2500 [02:54<04:44,  5.41it/s]

Epoch 961, Loss: 0.0141
Epoch 962, Loss: 0.0141


Training:  39%|███▊      | 963/2500 [02:54<04:43,  5.43it/s]

Epoch 963, Loss: 0.0141
Epoch 964, Loss: 0.0141


Training:  39%|███▊      | 965/2500 [02:55<04:43,  5.41it/s]

Epoch 965, Loss: 0.0141
Epoch 966, Loss: 0.0141


Training:  39%|███▊      | 967/2500 [02:55<04:45,  5.37it/s]

Epoch 967, Loss: 0.0141
Epoch 968, Loss: 0.0141


Training:  39%|███▉      | 969/2500 [02:56<04:40,  5.47it/s]

Epoch 969, Loss: 0.0141
Epoch 970, Loss: 0.0141


Training:  39%|███▉      | 971/2500 [02:56<04:37,  5.50it/s]

Epoch 971, Loss: 0.0141
Epoch 972, Loss: 0.0141


Training:  39%|███▉      | 973/2500 [02:56<04:36,  5.53it/s]

Epoch 973, Loss: 0.0141
Epoch 974, Loss: 0.0141


Training:  39%|███▉      | 975/2500 [02:57<04:36,  5.52it/s]

Epoch 975, Loss: 0.0141
Epoch 976, Loss: 0.0141


Training:  39%|███▉      | 977/2500 [02:57<04:35,  5.53it/s]

Epoch 977, Loss: 0.0141
Epoch 978, Loss: 0.0141


Training:  39%|███▉      | 979/2500 [02:57<04:36,  5.51it/s]

Epoch 979, Loss: 0.0141
Epoch 980, Loss: 0.0141


Training:  39%|███▉      | 981/2500 [02:58<04:34,  5.53it/s]

Epoch 981, Loss: 0.0141
Epoch 982, Loss: 0.0141


Training:  39%|███▉      | 983/2500 [02:58<04:34,  5.53it/s]

Epoch 983, Loss: 0.0141
Epoch 984, Loss: 0.0141


Training:  39%|███▉      | 985/2500 [02:58<04:34,  5.52it/s]

Epoch 985, Loss: 0.0141
Epoch 986, Loss: 0.0141


Training:  39%|███▉      | 987/2500 [02:59<04:33,  5.53it/s]

Epoch 987, Loss: 0.0141
Epoch 988, Loss: 0.0141


Training:  40%|███▉      | 989/2500 [02:59<04:33,  5.53it/s]

Epoch 989, Loss: 0.0141
Epoch 990, Loss: 0.0141


Training:  40%|███▉      | 991/2500 [03:00<04:32,  5.53it/s]

Epoch 991, Loss: 0.0141
Epoch 992, Loss: 0.0141


Training:  40%|███▉      | 993/2500 [03:00<04:31,  5.55it/s]

Epoch 993, Loss: 0.0141
Epoch 994, Loss: 0.0141


Training:  40%|███▉      | 995/2500 [03:00<04:30,  5.56it/s]

Epoch 995, Loss: 0.0141
Epoch 996, Loss: 0.0141


Training:  40%|███▉      | 997/2500 [03:01<04:32,  5.51it/s]

Epoch 997, Loss: 0.0141
Epoch 998, Loss: 0.0141


Training:  40%|███▉      | 999/2500 [03:01<04:31,  5.54it/s]

Epoch 999, Loss: 0.0141
Epoch 1000, Loss: 0.0141


Training:  40%|████      | 1001/2500 [03:01<04:42,  5.31it/s]

Epoch 1001, Loss: 0.0141
THRESHOLDING: 9 active coefficients
Epoch 1002, Loss: 0.0141


Training:  40%|████      | 1003/2500 [03:02<04:31,  5.52it/s]

Epoch 1003, Loss: 0.0141
Epoch 1004, Loss: 0.0141


Training:  40%|████      | 1005/2500 [03:02<04:29,  5.54it/s]

Epoch 1005, Loss: 0.0141
Epoch 1006, Loss: 0.0141


Training:  40%|████      | 1007/2500 [03:02<04:29,  5.54it/s]

Epoch 1007, Loss: 0.0141
Epoch 1008, Loss: 0.0141


Training:  40%|████      | 1009/2500 [03:03<04:29,  5.53it/s]

Epoch 1009, Loss: 0.0141
Epoch 1010, Loss: 0.0141


Training:  40%|████      | 1011/2500 [03:03<04:28,  5.55it/s]

Epoch 1011, Loss: 0.0141
Epoch 1012, Loss: 0.0141


Training:  41%|████      | 1013/2500 [03:03<04:27,  5.55it/s]

Epoch 1013, Loss: 0.0141
Epoch 1014, Loss: 0.0141


Training:  41%|████      | 1015/2500 [03:04<04:28,  5.53it/s]

Epoch 1015, Loss: 0.0141
Epoch 1016, Loss: 0.0141


Training:  41%|████      | 1017/2500 [03:04<04:27,  5.53it/s]

Epoch 1017, Loss: 0.0141
Epoch 1018, Loss: 0.0141


Training:  41%|████      | 1019/2500 [03:05<04:29,  5.49it/s]

Epoch 1019, Loss: 0.0141
Epoch 1020, Loss: 0.0141


Training:  41%|████      | 1021/2500 [03:05<04:27,  5.52it/s]

Epoch 1021, Loss: 0.0141
Epoch 1022, Loss: 0.0141


Training:  41%|████      | 1023/2500 [03:05<04:29,  5.48it/s]

Epoch 1023, Loss: 0.0141
Epoch 1024, Loss: 0.0141


Training:  41%|████      | 1025/2500 [03:06<04:31,  5.44it/s]

Epoch 1025, Loss: 0.0141
Epoch 1026, Loss: 0.0141


Training:  41%|████      | 1027/2500 [03:06<04:32,  5.41it/s]

Epoch 1027, Loss: 0.0141
Epoch 1028, Loss: 0.0141


Training:  41%|████      | 1029/2500 [03:06<04:28,  5.47it/s]

Epoch 1029, Loss: 0.0141
Epoch 1030, Loss: 0.0141


Training:  41%|████      | 1031/2500 [03:07<04:31,  5.41it/s]

Epoch 1031, Loss: 0.0141
Epoch 1032, Loss: 0.0141


Training:  41%|████▏     | 1033/2500 [03:07<04:34,  5.35it/s]

Epoch 1033, Loss: 0.0141
Epoch 1034, Loss: 0.0141


Training:  41%|████▏     | 1035/2500 [03:08<04:30,  5.42it/s]

Epoch 1035, Loss: 0.0141
Epoch 1036, Loss: 0.0141


Training:  41%|████▏     | 1037/2500 [03:08<04:27,  5.46it/s]

Epoch 1037, Loss: 0.0141
Epoch 1038, Loss: 0.0141


Training:  42%|████▏     | 1039/2500 [03:08<04:24,  5.52it/s]

Epoch 1039, Loss: 0.0141
Epoch 1040, Loss: 0.0141


Training:  42%|████▏     | 1041/2500 [03:09<04:23,  5.54it/s]

Epoch 1041, Loss: 0.0141
Epoch 1042, Loss: 0.0141


Training:  42%|████▏     | 1043/2500 [03:09<04:23,  5.53it/s]

Epoch 1043, Loss: 0.0141
Epoch 1044, Loss: 0.0141


Training:  42%|████▏     | 1045/2500 [03:09<04:23,  5.53it/s]

Epoch 1045, Loss: 0.0141
Epoch 1046, Loss: 0.0141


Training:  42%|████▏     | 1047/2500 [03:10<04:22,  5.54it/s]

Epoch 1047, Loss: 0.0141
Epoch 1048, Loss: 0.0141


Training:  42%|████▏     | 1049/2500 [03:10<04:21,  5.54it/s]

Epoch 1049, Loss: 0.0141
Epoch 1050, Loss: 0.0141


Training:  42%|████▏     | 1051/2500 [03:10<04:21,  5.55it/s]

Epoch 1051, Loss: 0.0141
Epoch 1052, Loss: 0.0141


Training:  42%|████▏     | 1053/2500 [03:11<04:21,  5.54it/s]

Epoch 1053, Loss: 0.0141
Epoch 1054, Loss: 0.0141


Training:  42%|████▏     | 1055/2500 [03:11<04:21,  5.53it/s]

Epoch 1055, Loss: 0.0141
Epoch 1056, Loss: 0.0141


Training:  42%|████▏     | 1057/2500 [03:12<04:20,  5.54it/s]

Epoch 1057, Loss: 0.0141
Epoch 1058, Loss: 0.0141


Training:  42%|████▏     | 1059/2500 [03:12<04:21,  5.50it/s]

Epoch 1059, Loss: 0.0141
Epoch 1060, Loss: 0.0141


Training:  42%|████▏     | 1061/2500 [03:12<04:20,  5.53it/s]

Epoch 1061, Loss: 0.0141
Epoch 1062, Loss: 0.0141


Training:  43%|████▎     | 1063/2500 [03:13<04:19,  5.54it/s]

Epoch 1063, Loss: 0.0141
Epoch 1064, Loss: 0.0141


Training:  43%|████▎     | 1065/2500 [03:13<04:19,  5.54it/s]

Epoch 1065, Loss: 0.0141
Epoch 1066, Loss: 0.0141


Training:  43%|████▎     | 1067/2500 [03:13<04:18,  5.55it/s]

Epoch 1067, Loss: 0.0141
Epoch 1068, Loss: 0.0141


Training:  43%|████▎     | 1069/2500 [03:14<04:17,  5.55it/s]

Epoch 1069, Loss: 0.0141
Epoch 1070, Loss: 0.0141


Training:  43%|████▎     | 1071/2500 [03:14<04:19,  5.51it/s]

Epoch 1071, Loss: 0.0141
Epoch 1072, Loss: 0.0141


Training:  43%|████▎     | 1073/2500 [03:14<04:18,  5.52it/s]

Epoch 1073, Loss: 0.0141
Epoch 1074, Loss: 0.0141


Training:  43%|████▎     | 1075/2500 [03:15<04:16,  5.55it/s]

Epoch 1075, Loss: 0.0141
Epoch 1076, Loss: 0.0141


Training:  43%|████▎     | 1077/2500 [03:15<04:17,  5.52it/s]

Epoch 1077, Loss: 0.0141
Epoch 1078, Loss: 0.0141


Training:  43%|████▎     | 1079/2500 [03:15<04:16,  5.55it/s]

Epoch 1079, Loss: 0.0141
Epoch 1080, Loss: 0.0141


Training:  43%|████▎     | 1081/2500 [03:16<04:15,  5.55it/s]

Epoch 1081, Loss: 0.0141
Epoch 1082, Loss: 0.0141


Training:  43%|████▎     | 1083/2500 [03:16<04:16,  5.54it/s]

Epoch 1083, Loss: 0.0141
Epoch 1084, Loss: 0.0141


Training:  43%|████▎     | 1085/2500 [03:17<04:15,  5.54it/s]

Epoch 1085, Loss: 0.0141
Epoch 1086, Loss: 0.0141


Training:  43%|████▎     | 1087/2500 [03:17<04:14,  5.55it/s]

Epoch 1087, Loss: 0.0141
Epoch 1088, Loss: 0.0141


Training:  44%|████▎     | 1089/2500 [03:17<04:14,  5.54it/s]

Epoch 1089, Loss: 0.0141
Epoch 1090, Loss: 0.0141


Training:  44%|████▎     | 1091/2500 [03:18<04:19,  5.42it/s]

Epoch 1091, Loss: 0.0141
Epoch 1092, Loss: 0.0141


Training:  44%|████▎     | 1093/2500 [03:18<04:19,  5.42it/s]

Epoch 1093, Loss: 0.0141
Epoch 1094, Loss: 0.0141


Training:  44%|████▍     | 1095/2500 [03:18<04:18,  5.43it/s]

Epoch 1095, Loss: 0.0141
Epoch 1096, Loss: 0.0141


Training:  44%|████▍     | 1097/2500 [03:19<04:20,  5.39it/s]

Epoch 1097, Loss: 0.0141
Epoch 1098, Loss: 0.0141


Training:  44%|████▍     | 1099/2500 [03:19<04:23,  5.31it/s]

Epoch 1099, Loss: 0.0141
Epoch 1100, Loss: 0.0141


Training:  44%|████▍     | 1101/2500 [03:20<04:24,  5.30it/s]

Epoch 1101, Loss: 0.0141
Epoch 1102, Loss: 0.0141


Training:  44%|████▍     | 1103/2500 [03:20<04:18,  5.39it/s]

Epoch 1103, Loss: 0.0141
Epoch 1104, Loss: 0.0141


Training:  44%|████▍     | 1105/2500 [03:20<04:16,  5.44it/s]

Epoch 1105, Loss: 0.0141
Epoch 1106, Loss: 0.0141


Training:  44%|████▍     | 1107/2500 [03:21<04:13,  5.49it/s]

Epoch 1107, Loss: 0.0141
Epoch 1108, Loss: 0.0141


Training:  44%|████▍     | 1109/2500 [03:21<04:12,  5.51it/s]

Epoch 1109, Loss: 0.0141
Epoch 1110, Loss: 0.0141


Training:  44%|████▍     | 1111/2500 [03:21<04:12,  5.51it/s]

Epoch 1111, Loss: 0.0141
Epoch 1112, Loss: 0.0141


Training:  45%|████▍     | 1113/2500 [03:22<04:11,  5.52it/s]

Epoch 1113, Loss: 0.0141
Epoch 1114, Loss: 0.0141


Training:  45%|████▍     | 1115/2500 [03:22<04:11,  5.50it/s]

Epoch 1115, Loss: 0.0141
Epoch 1116, Loss: 0.0141


Training:  45%|████▍     | 1117/2500 [03:22<04:11,  5.51it/s]

Epoch 1117, Loss: 0.0141
Epoch 1118, Loss: 0.0141


Training:  45%|████▍     | 1119/2500 [03:23<04:10,  5.52it/s]

Epoch 1119, Loss: 0.0141
Epoch 1120, Loss: 0.0141


Training:  45%|████▍     | 1121/2500 [03:23<04:09,  5.52it/s]

Epoch 1121, Loss: 0.0141
Epoch 1122, Loss: 0.0141


Training:  45%|████▍     | 1123/2500 [03:24<04:09,  5.51it/s]

Epoch 1123, Loss: 0.0141
Epoch 1124, Loss: 0.0141


Training:  45%|████▌     | 1125/2500 [03:24<04:09,  5.51it/s]

Epoch 1125, Loss: 0.0141
Epoch 1126, Loss: 0.0141


Training:  45%|████▌     | 1127/2500 [03:24<04:09,  5.51it/s]

Epoch 1127, Loss: 0.0141
Epoch 1128, Loss: 0.0141


Training:  45%|████▌     | 1129/2500 [03:25<04:09,  5.50it/s]

Epoch 1129, Loss: 0.0141
Epoch 1130, Loss: 0.0141


Training:  45%|████▌     | 1131/2500 [03:25<04:08,  5.52it/s]

Epoch 1131, Loss: 0.0141
Epoch 1132, Loss: 0.0141


Training:  45%|████▌     | 1133/2500 [03:25<04:06,  5.54it/s]

Epoch 1133, Loss: 0.0141
Epoch 1134, Loss: 0.0141


Training:  45%|████▌     | 1135/2500 [03:26<04:06,  5.53it/s]

Epoch 1135, Loss: 0.0141
Epoch 1136, Loss: 0.0141


Training:  45%|████▌     | 1137/2500 [03:26<04:06,  5.54it/s]

Epoch 1137, Loss: 0.0141
Epoch 1138, Loss: 0.0141


Training:  46%|████▌     | 1139/2500 [03:26<04:05,  5.54it/s]

Epoch 1139, Loss: 0.0141
Epoch 1140, Loss: 0.0141


Training:  46%|████▌     | 1141/2500 [03:27<04:07,  5.49it/s]

Epoch 1141, Loss: 0.0141
Epoch 1142, Loss: 0.0141


Training:  46%|████▌     | 1143/2500 [03:27<04:06,  5.52it/s]

Epoch 1143, Loss: 0.0141
Epoch 1144, Loss: 0.0141


Training:  46%|████▌     | 1145/2500 [03:28<04:06,  5.51it/s]

Epoch 1145, Loss: 0.0141
Epoch 1146, Loss: 0.0141


Training:  46%|████▌     | 1147/2500 [03:28<04:05,  5.51it/s]

Epoch 1147, Loss: 0.0141
Epoch 1148, Loss: 0.0141


Training:  46%|████▌     | 1149/2500 [03:28<04:04,  5.52it/s]

Epoch 1149, Loss: 0.0141
Epoch 1150, Loss: 0.0141


Training:  46%|████▌     | 1151/2500 [03:29<04:04,  5.51it/s]

Epoch 1151, Loss: 0.0141
Epoch 1152, Loss: 0.0141


Training:  46%|████▌     | 1153/2500 [03:29<04:03,  5.52it/s]

Epoch 1153, Loss: 0.0141
Epoch 1154, Loss: 0.0141


Training:  46%|████▌     | 1155/2500 [03:29<04:03,  5.52it/s]

Epoch 1155, Loss: 0.0141
Epoch 1156, Loss: 0.0141


Training:  46%|████▋     | 1157/2500 [03:30<04:10,  5.37it/s]

Epoch 1157, Loss: 0.0141
Epoch 1158, Loss: 0.0141


Training:  46%|████▋     | 1159/2500 [03:30<04:12,  5.31it/s]

Epoch 1159, Loss: 0.0141
Epoch 1160, Loss: 0.0141


Training:  46%|████▋     | 1161/2500 [03:30<04:10,  5.34it/s]

Epoch 1161, Loss: 0.0141
Epoch 1162, Loss: 0.0141


Training:  47%|████▋     | 1163/2500 [03:31<04:08,  5.39it/s]

Epoch 1163, Loss: 0.0141
Epoch 1164, Loss: 0.0141


Training:  47%|████▋     | 1165/2500 [03:31<04:08,  5.37it/s]

Epoch 1165, Loss: 0.0141
Epoch 1166, Loss: 0.0141


Training:  47%|████▋     | 1167/2500 [03:32<04:11,  5.31it/s]

Epoch 1167, Loss: 0.0141
Epoch 1168, Loss: 0.0141


Training:  47%|████▋     | 1169/2500 [03:32<04:08,  5.37it/s]

Epoch 1169, Loss: 0.0141
Epoch 1170, Loss: 0.0141


Training:  47%|████▋     | 1171/2500 [03:32<04:03,  5.45it/s]

Epoch 1171, Loss: 0.0141
Epoch 1172, Loss: 0.0141


Training:  47%|████▋     | 1173/2500 [03:33<04:02,  5.48it/s]

Epoch 1173, Loss: 0.0141
Epoch 1174, Loss: 0.0141


Training:  47%|████▋     | 1175/2500 [03:33<04:01,  5.48it/s]

Epoch 1175, Loss: 0.0141
Epoch 1176, Loss: 0.0141


Training:  47%|████▋     | 1177/2500 [03:33<04:00,  5.50it/s]

Epoch 1177, Loss: 0.0141
Epoch 1178, Loss: 0.0141


Training:  47%|████▋     | 1179/2500 [03:34<04:00,  5.49it/s]

Epoch 1179, Loss: 0.0141
Epoch 1180, Loss: 0.0141


Training:  47%|████▋     | 1181/2500 [03:34<03:58,  5.52it/s]

Epoch 1181, Loss: 0.0141
Epoch 1182, Loss: 0.0141


Training:  47%|████▋     | 1183/2500 [03:34<03:59,  5.50it/s]

Epoch 1183, Loss: 0.0141
Epoch 1184, Loss: 0.0141


Training:  47%|████▋     | 1185/2500 [03:35<03:58,  5.51it/s]

Epoch 1185, Loss: 0.0141
Epoch 1186, Loss: 0.0141


Training:  47%|████▋     | 1187/2500 [03:35<03:58,  5.52it/s]

Epoch 1187, Loss: 0.0141
Epoch 1188, Loss: 0.0141


Training:  48%|████▊     | 1189/2500 [03:36<03:57,  5.52it/s]

Epoch 1189, Loss: 0.0141
Epoch 1190, Loss: 0.0141


Training:  48%|████▊     | 1191/2500 [03:36<03:57,  5.52it/s]

Epoch 1191, Loss: 0.0141
Epoch 1192, Loss: 0.0141


Training:  48%|████▊     | 1193/2500 [03:36<03:56,  5.53it/s]

Epoch 1193, Loss: 0.0141
Epoch 1194, Loss: 0.0141


Training:  48%|████▊     | 1195/2500 [03:37<03:55,  5.54it/s]

Epoch 1195, Loss: 0.0141
Epoch 1196, Loss: 0.0141


Training:  48%|████▊     | 1197/2500 [03:37<03:55,  5.54it/s]

Epoch 1197, Loss: 0.0141
Epoch 1198, Loss: 0.0141


Training:  48%|████▊     | 1199/2500 [03:37<03:55,  5.53it/s]

Epoch 1199, Loss: 0.0141
Epoch 1200, Loss: 0.0141


Training:  48%|████▊     | 1201/2500 [03:38<03:54,  5.53it/s]

Epoch 1201, Loss: 0.0141
Epoch 1202, Loss: 0.0141


Training:  48%|████▊     | 1203/2500 [03:38<03:54,  5.53it/s]

Epoch 1203, Loss: 0.0141
Epoch 1204, Loss: 0.0141


Training:  48%|████▊     | 1205/2500 [03:38<03:53,  5.54it/s]

Epoch 1205, Loss: 0.0141
Epoch 1206, Loss: 0.0141


Training:  48%|████▊     | 1207/2500 [03:39<03:53,  5.54it/s]

Epoch 1207, Loss: 0.0141
Epoch 1208, Loss: 0.0141


Training:  48%|████▊     | 1209/2500 [03:39<03:54,  5.52it/s]

Epoch 1209, Loss: 0.0141
Epoch 1210, Loss: 0.0141


Training:  48%|████▊     | 1211/2500 [03:40<03:53,  5.51it/s]

Epoch 1211, Loss: 0.0141
Epoch 1212, Loss: 0.0141


Training:  49%|████▊     | 1213/2500 [03:40<03:53,  5.51it/s]

Epoch 1213, Loss: 0.0141
Epoch 1214, Loss: 0.0141


Training:  49%|████▊     | 1215/2500 [03:40<03:54,  5.48it/s]

Epoch 1215, Loss: 0.0141
Epoch 1216, Loss: 0.0141


Training:  49%|████▊     | 1217/2500 [03:41<03:53,  5.49it/s]

Epoch 1217, Loss: 0.0141
Epoch 1218, Loss: 0.0141


Training:  49%|████▉     | 1219/2500 [03:41<03:54,  5.46it/s]

Epoch 1219, Loss: 0.0141
Epoch 1220, Loss: 0.0141


Training:  49%|████▉     | 1221/2500 [03:41<03:52,  5.50it/s]

Epoch 1221, Loss: 0.0141
Epoch 1222, Loss: 0.0141


Training:  49%|████▉     | 1223/2500 [03:42<03:51,  5.52it/s]

Epoch 1223, Loss: 0.0141
Epoch 1224, Loss: 0.0141


Training:  49%|████▉     | 1225/2500 [03:42<03:56,  5.38it/s]

Epoch 1225, Loss: 0.0141
Epoch 1226, Loss: 0.0141


Training:  49%|████▉     | 1227/2500 [03:42<03:56,  5.39it/s]

Epoch 1227, Loss: 0.0141
Epoch 1228, Loss: 0.0141


Training:  49%|████▉     | 1229/2500 [03:43<03:55,  5.39it/s]

Epoch 1229, Loss: 0.0141
Epoch 1230, Loss: 0.0141


Training:  49%|████▉     | 1231/2500 [03:43<03:57,  5.35it/s]

Epoch 1231, Loss: 0.0141
Epoch 1232, Loss: 0.0141


Training:  49%|████▉     | 1233/2500 [03:44<03:55,  5.37it/s]

Epoch 1233, Loss: 0.0141
Epoch 1234, Loss: 0.0141


Training:  49%|████▉     | 1235/2500 [03:44<03:55,  5.38it/s]

Epoch 1235, Loss: 0.0141
Epoch 1236, Loss: 0.0141


Training:  49%|████▉     | 1237/2500 [03:44<03:51,  5.45it/s]

Epoch 1237, Loss: 0.0141
Epoch 1238, Loss: 0.0141


Training:  50%|████▉     | 1239/2500 [03:45<03:48,  5.51it/s]

Epoch 1239, Loss: 0.0141
Epoch 1240, Loss: 0.0141


Training:  50%|████▉     | 1241/2500 [03:45<03:47,  5.52it/s]

Epoch 1241, Loss: 0.0141
Epoch 1242, Loss: 0.0141


Training:  50%|████▉     | 1243/2500 [03:45<03:47,  5.52it/s]

Epoch 1243, Loss: 0.0141
Epoch 1244, Loss: 0.0141


Training:  50%|████▉     | 1245/2500 [03:46<03:47,  5.52it/s]

Epoch 1245, Loss: 0.0141
Epoch 1246, Loss: 0.0141


Training:  50%|████▉     | 1247/2500 [03:46<03:46,  5.52it/s]

Epoch 1247, Loss: 0.0141
Epoch 1248, Loss: 0.0141


Training:  50%|████▉     | 1249/2500 [03:47<03:46,  5.51it/s]

Epoch 1249, Loss: 0.0141
Epoch 1250, Loss: 0.0141


Training:  50%|█████     | 1251/2500 [03:47<03:56,  5.29it/s]

Epoch 1251, Loss: 0.0141
THRESHOLDING: 9 active coefficients
Epoch 1252, Loss: 0.0141


Training:  50%|█████     | 1253/2500 [03:47<03:44,  5.56it/s]

Epoch 1253, Loss: 0.0141
Epoch 1254, Loss: 0.0141


Training:  50%|█████     | 1255/2500 [03:48<03:45,  5.53it/s]

Epoch 1255, Loss: 0.0141
Epoch 1256, Loss: 0.0141


Training:  50%|█████     | 1257/2500 [03:48<03:44,  5.54it/s]

Epoch 1257, Loss: 0.0141
Epoch 1258, Loss: 0.0141


Training:  50%|█████     | 1259/2500 [03:48<03:45,  5.49it/s]

Epoch 1259, Loss: 0.0141
Epoch 1260, Loss: 0.0141


Training:  50%|█████     | 1261/2500 [03:49<03:49,  5.40it/s]

Epoch 1261, Loss: 0.0141
Epoch 1262, Loss: 0.0141


Training:  51%|█████     | 1263/2500 [03:49<03:50,  5.36it/s]

Epoch 1263, Loss: 0.0141
Epoch 1264, Loss: 0.0141


Training:  51%|█████     | 1265/2500 [03:49<03:48,  5.40it/s]

Epoch 1265, Loss: 0.0141
Epoch 1266, Loss: 0.0141


Training:  51%|█████     | 1267/2500 [03:50<03:50,  5.34it/s]

Epoch 1267, Loss: 0.0141
Epoch 1268, Loss: 0.0141


Training:  51%|█████     | 1269/2500 [03:50<03:50,  5.35it/s]

Epoch 1269, Loss: 0.0141
Epoch 1270, Loss: 0.0141


Training:  51%|█████     | 1271/2500 [03:51<03:52,  5.29it/s]

Epoch 1271, Loss: 0.0141
Epoch 1272, Loss: 0.0141


Training:  51%|█████     | 1273/2500 [03:51<03:47,  5.40it/s]

Epoch 1273, Loss: 0.0141
Epoch 1274, Loss: 0.0141


Training:  51%|█████     | 1275/2500 [03:51<03:44,  5.46it/s]

Epoch 1275, Loss: 0.0141
Epoch 1276, Loss: 0.0141


Training:  51%|█████     | 1277/2500 [03:52<03:43,  5.47it/s]

Epoch 1277, Loss: 0.0141
Epoch 1278, Loss: 0.0141


Training:  51%|█████     | 1279/2500 [03:52<03:43,  5.45it/s]

Epoch 1279, Loss: 0.0141
Epoch 1280, Loss: 0.0141


Training:  51%|█████     | 1281/2500 [03:52<03:42,  5.49it/s]

Epoch 1281, Loss: 0.0141
Epoch 1282, Loss: 0.0141


Training:  51%|█████▏    | 1283/2500 [03:53<03:40,  5.51it/s]

Epoch 1283, Loss: 0.0141
Epoch 1284, Loss: 0.0141


Training:  51%|█████▏    | 1285/2500 [03:53<03:40,  5.52it/s]

Epoch 1285, Loss: 0.0141
Epoch 1286, Loss: 0.0141


Training:  51%|█████▏    | 1287/2500 [03:53<03:39,  5.51it/s]

Epoch 1287, Loss: 0.0141
Epoch 1288, Loss: 0.0141


Training:  52%|█████▏    | 1289/2500 [03:54<03:40,  5.50it/s]

Epoch 1289, Loss: 0.0141
Epoch 1290, Loss: 0.0141


Training:  52%|█████▏    | 1291/2500 [03:54<03:42,  5.44it/s]

Epoch 1291, Loss: 0.0141
Epoch 1292, Loss: 0.0141


Training:  52%|█████▏    | 1293/2500 [03:55<03:43,  5.41it/s]

Epoch 1293, Loss: 0.0141
Epoch 1294, Loss: 0.0141


Training:  52%|█████▏    | 1295/2500 [03:55<03:44,  5.36it/s]

Epoch 1295, Loss: 0.0141
Epoch 1296, Loss: 0.0141


Training:  52%|█████▏    | 1297/2500 [03:55<03:42,  5.41it/s]

Epoch 1297, Loss: 0.0141
Epoch 1298, Loss: 0.0141


Training:  52%|█████▏    | 1299/2500 [03:56<03:45,  5.33it/s]

Epoch 1299, Loss: 0.0141
Epoch 1300, Loss: 0.0141


Training:  52%|█████▏    | 1301/2500 [03:56<03:46,  5.30it/s]

Epoch 1301, Loss: 0.0141
Epoch 1302, Loss: 0.0141


Training:  52%|█████▏    | 1303/2500 [03:56<03:40,  5.43it/s]

Epoch 1303, Loss: 0.0141
Epoch 1304, Loss: 0.0141


Training:  52%|█████▏    | 1305/2500 [03:57<03:38,  5.47it/s]

Epoch 1305, Loss: 0.0141
Epoch 1306, Loss: 0.0141


Training:  52%|█████▏    | 1307/2500 [03:57<03:37,  5.50it/s]

Epoch 1307, Loss: 0.0141
Epoch 1308, Loss: 0.0141


Training:  52%|█████▏    | 1309/2500 [03:58<03:36,  5.51it/s]

Epoch 1309, Loss: 0.0141
Epoch 1310, Loss: 0.0141


Training:  52%|█████▏    | 1311/2500 [03:58<03:36,  5.50it/s]

Epoch 1311, Loss: 0.0141
Epoch 1312, Loss: 0.0141


Training:  53%|█████▎    | 1313/2500 [03:58<03:34,  5.52it/s]

Epoch 1313, Loss: 0.0141
Epoch 1314, Loss: 0.0141


Training:  53%|█████▎    | 1315/2500 [03:59<03:34,  5.53it/s]

Epoch 1315, Loss: 0.0141
Epoch 1316, Loss: 0.0141


Training:  53%|█████▎    | 1317/2500 [03:59<03:33,  5.53it/s]

Epoch 1317, Loss: 0.0141
Epoch 1318, Loss: 0.0141


Training:  53%|█████▎    | 1319/2500 [03:59<03:33,  5.54it/s]

Epoch 1319, Loss: 0.0141
Epoch 1320, Loss: 0.0141


Training:  53%|█████▎    | 1321/2500 [04:00<03:32,  5.54it/s]

Epoch 1321, Loss: 0.0141
Epoch 1322, Loss: 0.0141


Training:  53%|█████▎    | 1323/2500 [04:00<03:32,  5.54it/s]

Epoch 1323, Loss: 0.0141
Epoch 1324, Loss: 0.0141


Training:  53%|█████▎    | 1325/2500 [04:00<03:32,  5.53it/s]

Epoch 1325, Loss: 0.0141
Epoch 1326, Loss: 0.0141


Training:  53%|█████▎    | 1327/2500 [04:01<03:32,  5.52it/s]

Epoch 1327, Loss: 0.0141
Epoch 1328, Loss: 0.0141


Training:  53%|█████▎    | 1329/2500 [04:01<03:33,  5.49it/s]

Epoch 1329, Loss: 0.0141
Epoch 1330, Loss: 0.0141


Training:  53%|█████▎    | 1331/2500 [04:02<03:32,  5.50it/s]

Epoch 1331, Loss: 0.0141
Epoch 1332, Loss: 0.0141


Training:  53%|█████▎    | 1333/2500 [04:02<03:32,  5.48it/s]

Epoch 1333, Loss: 0.0141
Epoch 1334, Loss: 0.0141


Training:  53%|█████▎    | 1335/2500 [04:02<03:30,  5.53it/s]

Epoch 1335, Loss: 0.0141
Epoch 1336, Loss: 0.0141


Training:  53%|█████▎    | 1337/2500 [04:03<03:30,  5.53it/s]

Epoch 1337, Loss: 0.0141
Epoch 1338, Loss: 0.0141


Training:  54%|█████▎    | 1339/2500 [04:03<03:29,  5.54it/s]

Epoch 1339, Loss: 0.0141
Epoch 1340, Loss: 0.0141


Training:  54%|█████▎    | 1341/2500 [04:03<03:29,  5.53it/s]

Epoch 1341, Loss: 0.0141
Epoch 1342, Loss: 0.0141


Training:  54%|█████▎    | 1343/2500 [04:04<03:29,  5.53it/s]

Epoch 1343, Loss: 0.0141
Epoch 1344, Loss: 0.0141


Training:  54%|█████▍    | 1345/2500 [04:04<03:29,  5.50it/s]

Epoch 1345, Loss: 0.0141
Epoch 1346, Loss: 0.0141


Training:  54%|█████▍    | 1347/2500 [04:04<03:29,  5.51it/s]

Epoch 1347, Loss: 0.0141
Epoch 1348, Loss: 0.0141


Training:  54%|█████▍    | 1349/2500 [04:05<03:28,  5.51it/s]

Epoch 1349, Loss: 0.0141
Epoch 1350, Loss: 0.0141


Training:  54%|█████▍    | 1351/2500 [04:05<03:28,  5.51it/s]

Epoch 1351, Loss: 0.0141
Epoch 1352, Loss: 0.0141


Training:  54%|█████▍    | 1353/2500 [04:06<03:28,  5.51it/s]

Epoch 1353, Loss: 0.0141
Epoch 1354, Loss: 0.0141


Training:  54%|█████▍    | 1355/2500 [04:06<03:27,  5.51it/s]

Epoch 1355, Loss: 0.0141
Epoch 1356, Loss: 0.0141


Training:  54%|█████▍    | 1357/2500 [04:06<03:28,  5.47it/s]

Epoch 1357, Loss: 0.0141
Epoch 1358, Loss: 0.0141


Training:  54%|█████▍    | 1359/2500 [04:07<03:32,  5.38it/s]

Epoch 1359, Loss: 0.0141
Epoch 1360, Loss: 0.0141


Training:  54%|█████▍    | 1361/2500 [04:07<03:30,  5.40it/s]

Epoch 1361, Loss: 0.0141
Epoch 1362, Loss: 0.0141


Training:  55%|█████▍    | 1363/2500 [04:07<03:30,  5.41it/s]

Epoch 1363, Loss: 0.0141
Epoch 1364, Loss: 0.0141


Training:  55%|█████▍    | 1365/2500 [04:08<03:31,  5.37it/s]

Epoch 1365, Loss: 0.0141
Epoch 1366, Loss: 0.0141


Training:  55%|█████▍    | 1367/2500 [04:08<03:33,  5.31it/s]

Epoch 1367, Loss: 0.0141
Epoch 1368, Loss: 0.0141


Training:  55%|█████▍    | 1369/2500 [04:08<03:31,  5.35it/s]

Epoch 1369, Loss: 0.0141
Epoch 1370, Loss: 0.0141


Training:  55%|█████▍    | 1371/2500 [04:09<03:27,  5.45it/s]

Epoch 1371, Loss: 0.0141
Epoch 1372, Loss: 0.0141


Training:  55%|█████▍    | 1373/2500 [04:09<03:26,  5.46it/s]

Epoch 1373, Loss: 0.0141
Epoch 1374, Loss: 0.0141


Training:  55%|█████▌    | 1375/2500 [04:10<03:24,  5.51it/s]

Epoch 1375, Loss: 0.0141
Epoch 1376, Loss: 0.0141


Training:  55%|█████▌    | 1377/2500 [04:10<03:23,  5.53it/s]

Epoch 1377, Loss: 0.0141
Epoch 1378, Loss: 0.0141


Training:  55%|█████▌    | 1379/2500 [04:10<03:22,  5.53it/s]

Epoch 1379, Loss: 0.0141
Epoch 1380, Loss: 0.0141


Training:  55%|█████▌    | 1381/2500 [04:11<03:22,  5.54it/s]

Epoch 1381, Loss: 0.0141
Epoch 1382, Loss: 0.0141


Training:  55%|█████▌    | 1383/2500 [04:11<03:21,  5.53it/s]

Epoch 1383, Loss: 0.0141
Epoch 1384, Loss: 0.0141


Training:  55%|█████▌    | 1385/2500 [04:11<03:21,  5.52it/s]

Epoch 1385, Loss: 0.0141
Epoch 1386, Loss: 0.0141


Training:  55%|█████▌    | 1387/2500 [04:12<03:21,  5.53it/s]

Epoch 1387, Loss: 0.0141
Epoch 1388, Loss: 0.0141


Training:  56%|█████▌    | 1389/2500 [04:12<03:20,  5.54it/s]

Epoch 1389, Loss: 0.0141
Epoch 1390, Loss: 0.0141


Training:  56%|█████▌    | 1391/2500 [04:12<03:20,  5.53it/s]

Epoch 1391, Loss: 0.0141
Epoch 1392, Loss: 0.0141


Training:  56%|█████▌    | 1393/2500 [04:13<03:19,  5.54it/s]

Epoch 1393, Loss: 0.0141
Epoch 1394, Loss: 0.0141


Training:  56%|█████▌    | 1395/2500 [04:13<03:20,  5.52it/s]

Epoch 1395, Loss: 0.0141
Epoch 1396, Loss: 0.0141


Training:  56%|█████▌    | 1397/2500 [04:14<03:20,  5.50it/s]

Epoch 1397, Loss: 0.0141
Epoch 1398, Loss: 0.0141


Training:  56%|█████▌    | 1399/2500 [04:14<03:19,  5.51it/s]

Epoch 1399, Loss: 0.0141
Epoch 1400, Loss: 0.0141


Training:  56%|█████▌    | 1401/2500 [04:14<03:19,  5.51it/s]

Epoch 1401, Loss: 0.0141
Epoch 1402, Loss: 0.0141


Training:  56%|█████▌    | 1403/2500 [04:15<03:18,  5.53it/s]

Epoch 1403, Loss: 0.0141
Epoch 1404, Loss: 0.0141


Training:  56%|█████▌    | 1405/2500 [04:15<03:18,  5.52it/s]

Epoch 1405, Loss: 0.0141
Epoch 1406, Loss: 0.0141


Training:  56%|█████▋    | 1407/2500 [04:15<03:18,  5.52it/s]

Epoch 1407, Loss: 0.0141
Epoch 1408, Loss: 0.0141


Training:  56%|█████▋    | 1409/2500 [04:16<03:18,  5.51it/s]

Epoch 1409, Loss: 0.0141
Epoch 1410, Loss: 0.0141


Training:  56%|█████▋    | 1411/2500 [04:16<03:17,  5.51it/s]

Epoch 1411, Loss: 0.0141
Epoch 1412, Loss: 0.0141


Training:  57%|█████▋    | 1413/2500 [04:16<03:18,  5.48it/s]

Epoch 1413, Loss: 0.0141
Epoch 1414, Loss: 0.0141


Training:  57%|█████▋    | 1415/2500 [04:17<03:16,  5.52it/s]

Epoch 1415, Loss: 0.0141
Epoch 1416, Loss: 0.0141


Training:  57%|█████▋    | 1417/2500 [04:17<03:16,  5.52it/s]

Epoch 1417, Loss: 0.0141
Epoch 1418, Loss: 0.0141


Training:  57%|█████▋    | 1419/2500 [04:18<03:16,  5.49it/s]

Epoch 1419, Loss: 0.0141
Epoch 1420, Loss: 0.0141


Training:  57%|█████▋    | 1421/2500 [04:18<03:15,  5.52it/s]

Epoch 1421, Loss: 0.0141
Epoch 1422, Loss: 0.0141


Training:  57%|█████▋    | 1423/2500 [04:18<03:15,  5.50it/s]

Epoch 1423, Loss: 0.0141
Epoch 1424, Loss: 0.0141


Training:  57%|█████▋    | 1425/2500 [04:19<03:19,  5.38it/s]

Epoch 1425, Loss: 0.0141
Epoch 1426, Loss: 0.0141


Training:  57%|█████▋    | 1427/2500 [04:19<03:21,  5.33it/s]

Epoch 1427, Loss: 0.0141
Epoch 1428, Loss: 0.0141


Training:  57%|█████▋    | 1429/2500 [04:19<03:20,  5.34it/s]

Epoch 1429, Loss: 0.0141
Epoch 1430, Loss: 0.0141


Training:  57%|█████▋    | 1431/2500 [04:20<03:20,  5.33it/s]

Epoch 1431, Loss: 0.0141
Epoch 1432, Loss: 0.0141


Training:  57%|█████▋    | 1433/2500 [04:20<03:21,  5.29it/s]

Epoch 1433, Loss: 0.0141
Epoch 1434, Loss: 0.0141


Training:  57%|█████▋    | 1435/2500 [04:21<03:20,  5.32it/s]

Epoch 1435, Loss: 0.0141
Epoch 1436, Loss: 0.0141


Training:  57%|█████▋    | 1437/2500 [04:21<03:16,  5.42it/s]

Epoch 1437, Loss: 0.0141
Epoch 1438, Loss: 0.0141


Training:  58%|█████▊    | 1439/2500 [04:21<03:13,  5.48it/s]

Epoch 1439, Loss: 0.0141
Epoch 1440, Loss: 0.0141


Training:  58%|█████▊    | 1441/2500 [04:22<03:12,  5.51it/s]

Epoch 1441, Loss: 0.0141
Epoch 1442, Loss: 0.0141


Training:  58%|█████▊    | 1443/2500 [04:22<03:11,  5.52it/s]

Epoch 1443, Loss: 0.0141
Epoch 1444, Loss: 0.0141


Training:  58%|█████▊    | 1445/2500 [04:22<03:11,  5.52it/s]

Epoch 1445, Loss: 0.0141
Epoch 1446, Loss: 0.0141


Training:  58%|█████▊    | 1447/2500 [04:23<03:10,  5.53it/s]

Epoch 1447, Loss: 0.0141
Epoch 1448, Loss: 0.0141


Training:  58%|█████▊    | 1449/2500 [04:23<03:10,  5.52it/s]

Epoch 1449, Loss: 0.0141
Epoch 1450, Loss: 0.0141


Training:  58%|█████▊    | 1451/2500 [04:23<03:09,  5.54it/s]

Epoch 1451, Loss: 0.0141
Epoch 1452, Loss: 0.0141


Training:  58%|█████▊    | 1453/2500 [04:24<03:10,  5.50it/s]

Epoch 1453, Loss: 0.0141
Epoch 1454, Loss: 0.0141


Training:  58%|█████▊    | 1455/2500 [04:24<03:08,  5.53it/s]

Epoch 1455, Loss: 0.0141
Epoch 1456, Loss: 0.0141


Training:  58%|█████▊    | 1457/2500 [04:25<03:08,  5.53it/s]

Epoch 1457, Loss: 0.0141
Epoch 1458, Loss: 0.0141


Training:  58%|█████▊    | 1459/2500 [04:25<03:08,  5.53it/s]

Epoch 1459, Loss: 0.0141
Epoch 1460, Loss: 0.0141


Training:  58%|█████▊    | 1461/2500 [04:25<03:07,  5.53it/s]

Epoch 1461, Loss: 0.0141
Epoch 1462, Loss: 0.0141


Training:  59%|█████▊    | 1463/2500 [04:26<03:07,  5.52it/s]

Epoch 1463, Loss: 0.0141
Epoch 1464, Loss: 0.0141


Training:  59%|█████▊    | 1465/2500 [04:26<03:08,  5.50it/s]

Epoch 1465, Loss: 0.0141
Epoch 1466, Loss: 0.0141


Training:  59%|█████▊    | 1467/2500 [04:26<03:07,  5.51it/s]

Epoch 1467, Loss: 0.0141
Epoch 1468, Loss: 0.0141


Training:  59%|█████▉    | 1469/2500 [04:27<03:06,  5.52it/s]

Epoch 1469, Loss: 0.0141
Epoch 1470, Loss: 0.0141


Training:  59%|█████▉    | 1471/2500 [04:27<03:07,  5.50it/s]

Epoch 1471, Loss: 0.0141
Epoch 1472, Loss: 0.0141


Training:  59%|█████▉    | 1473/2500 [04:27<03:06,  5.52it/s]

Epoch 1473, Loss: 0.0141
Epoch 1474, Loss: 0.0141


Training:  59%|█████▉    | 1475/2500 [04:28<03:05,  5.51it/s]

Epoch 1475, Loss: 0.0141
Epoch 1476, Loss: 0.0141


Training:  59%|█████▉    | 1477/2500 [04:28<03:05,  5.52it/s]

Epoch 1477, Loss: 0.0141
Epoch 1478, Loss: 0.0141


Training:  59%|█████▉    | 1479/2500 [04:29<03:04,  5.53it/s]

Epoch 1479, Loss: 0.0141
Epoch 1480, Loss: 0.0141


Training:  59%|█████▉    | 1481/2500 [04:29<03:04,  5.53it/s]

Epoch 1481, Loss: 0.0141
Epoch 1482, Loss: 0.0141


Training:  59%|█████▉    | 1483/2500 [04:29<03:03,  5.54it/s]

Epoch 1483, Loss: 0.0141
Epoch 1484, Loss: 0.0141


Training:  59%|█████▉    | 1485/2500 [04:30<03:03,  5.53it/s]

Epoch 1485, Loss: 0.0141
Epoch 1486, Loss: 0.0141


Training:  59%|█████▉    | 1487/2500 [04:30<03:03,  5.52it/s]

Epoch 1487, Loss: 0.0141
Epoch 1488, Loss: 0.0141


Training:  60%|█████▉    | 1489/2500 [04:30<03:04,  5.49it/s]

Epoch 1489, Loss: 0.0141
Epoch 1490, Loss: 0.0141


Training:  60%|█████▉    | 1491/2500 [04:31<03:04,  5.47it/s]

Epoch 1491, Loss: 0.0141
Epoch 1492, Loss: 0.0141


Training:  60%|█████▉    | 1493/2500 [04:31<03:06,  5.40it/s]

Epoch 1493, Loss: 0.0141
Epoch 1494, Loss: 0.0141


Training:  60%|█████▉    | 1495/2500 [04:31<03:06,  5.38it/s]

Epoch 1495, Loss: 0.0141
Epoch 1496, Loss: 0.0141


Training:  60%|█████▉    | 1497/2500 [04:32<03:06,  5.38it/s]

Epoch 1497, Loss: 0.0141
Epoch 1498, Loss: 0.0141


Training:  60%|█████▉    | 1499/2500 [04:32<03:08,  5.30it/s]

Epoch 1499, Loss: 0.0141
Epoch 1500, Loss: 0.0141


Training:  60%|██████    | 1501/2500 [04:33<03:16,  5.07it/s]

Epoch 1501, Loss: 0.0141
THRESHOLDING: 9 active coefficients


Training:  60%|██████    | 1502/2500 [04:33<03:11,  5.21it/s]

Epoch 1502, Loss: 0.0141
Epoch 1503, Loss: 0.0141


Training:  60%|██████    | 1504/2500 [04:33<03:05,  5.37it/s]

Epoch 1504, Loss: 0.0141
Epoch 1505, Loss: 0.0141


Training:  60%|██████    | 1506/2500 [04:34<03:03,  5.42it/s]

Epoch 1506, Loss: 0.0141
Epoch 1507, Loss: 0.0141


Training:  60%|██████    | 1508/2500 [04:34<03:01,  5.47it/s]

Epoch 1508, Loss: 0.0141
Epoch 1509, Loss: 0.0141


Training:  60%|██████    | 1510/2500 [04:34<02:59,  5.51it/s]

Epoch 1510, Loss: 0.0141
Epoch 1511, Loss: 0.0141


Training:  60%|██████    | 1512/2500 [04:35<02:59,  5.51it/s]

Epoch 1512, Loss: 0.0141
Epoch 1513, Loss: 0.0141


Training:  61%|██████    | 1514/2500 [04:35<02:58,  5.52it/s]

Epoch 1514, Loss: 0.0141
Epoch 1515, Loss: 0.0141


Training:  61%|██████    | 1516/2500 [04:35<02:58,  5.51it/s]

Epoch 1516, Loss: 0.0141
Epoch 1517, Loss: 0.0141


Training:  61%|██████    | 1518/2500 [04:36<02:58,  5.51it/s]

Epoch 1518, Loss: 0.0141
Epoch 1519, Loss: 0.0141


Training:  61%|██████    | 1520/2500 [04:36<02:57,  5.51it/s]

Epoch 1520, Loss: 0.0141
Epoch 1521, Loss: 0.0141


Training:  61%|██████    | 1522/2500 [04:36<02:58,  5.49it/s]

Epoch 1522, Loss: 0.0141
Epoch 1523, Loss: 0.0141


Training:  61%|██████    | 1524/2500 [04:37<02:56,  5.52it/s]

Epoch 1524, Loss: 0.0141
Epoch 1525, Loss: 0.0141


Training:  61%|██████    | 1526/2500 [04:37<02:56,  5.52it/s]

Epoch 1526, Loss: 0.0141
Epoch 1527, Loss: 0.0141


Training:  61%|██████    | 1528/2500 [04:37<02:56,  5.52it/s]

Epoch 1528, Loss: 0.0141
Epoch 1529, Loss: 0.0141


Training:  61%|██████    | 1530/2500 [04:38<02:55,  5.52it/s]

Epoch 1530, Loss: 0.0141
Epoch 1531, Loss: 0.0141


Training:  61%|██████▏   | 1532/2500 [04:38<02:55,  5.52it/s]

Epoch 1532, Loss: 0.0141
Epoch 1533, Loss: 0.0141


Training:  61%|██████▏   | 1534/2500 [04:39<02:55,  5.51it/s]

Epoch 1534, Loss: 0.0141
Epoch 1535, Loss: 0.0141


Training:  61%|██████▏   | 1536/2500 [04:39<02:54,  5.52it/s]

Epoch 1536, Loss: 0.0141
Epoch 1537, Loss: 0.0141


Training:  62%|██████▏   | 1538/2500 [04:39<02:53,  5.53it/s]

Epoch 1538, Loss: 0.0141
Epoch 1539, Loss: 0.0141


Training:  62%|██████▏   | 1540/2500 [04:40<02:53,  5.53it/s]

Epoch 1540, Loss: 0.0141
Epoch 1541, Loss: 0.0141


Training:  62%|██████▏   | 1542/2500 [04:40<02:53,  5.53it/s]

Epoch 1542, Loss: 0.0141
Epoch 1543, Loss: 0.0141


Training:  62%|██████▏   | 1544/2500 [04:40<02:52,  5.54it/s]

Epoch 1544, Loss: 0.0141
Epoch 1545, Loss: 0.0141


Training:  62%|██████▏   | 1546/2500 [04:41<02:52,  5.53it/s]

Epoch 1546, Loss: 0.0141
Epoch 1547, Loss: 0.0141


Training:  62%|██████▏   | 1548/2500 [04:41<02:51,  5.54it/s]

Epoch 1548, Loss: 0.0141
Epoch 1549, Loss: 0.0141


Training:  62%|██████▏   | 1550/2500 [04:41<02:51,  5.54it/s]

Epoch 1550, Loss: 0.0141
Epoch 1551, Loss: 0.0141


Training:  62%|██████▏   | 1552/2500 [04:42<02:50,  5.55it/s]

Epoch 1552, Loss: 0.0141
Epoch 1553, Loss: 0.0141


Training:  62%|██████▏   | 1554/2500 [04:42<02:50,  5.56it/s]

Epoch 1554, Loss: 0.0141
Epoch 1555, Loss: 0.0141


Training:  62%|██████▏   | 1556/2500 [04:43<02:50,  5.55it/s]

Epoch 1556, Loss: 0.0141
Epoch 1557, Loss: 0.0141


Training:  62%|██████▏   | 1558/2500 [04:43<02:52,  5.47it/s]

Epoch 1558, Loss: 0.0141
Epoch 1559, Loss: 0.0141


Training:  62%|██████▏   | 1560/2500 [04:43<02:52,  5.44it/s]

Epoch 1560, Loss: 0.0141
Epoch 1561, Loss: 0.0141


Training:  62%|██████▏   | 1562/2500 [04:44<02:53,  5.40it/s]

Epoch 1562, Loss: 0.0141
Epoch 1563, Loss: 0.0141


Training:  63%|██████▎   | 1564/2500 [04:44<02:52,  5.42it/s]

Epoch 1564, Loss: 0.0141
Epoch 1565, Loss: 0.0141


Training:  63%|██████▎   | 1566/2500 [04:44<02:52,  5.40it/s]

Epoch 1566, Loss: 0.0141
Epoch 1567, Loss: 0.0141


Training:  63%|██████▎   | 1568/2500 [04:45<02:56,  5.29it/s]

Epoch 1568, Loss: 0.0141
Epoch 1569, Loss: 0.0141


Training:  63%|██████▎   | 1570/2500 [04:45<02:53,  5.37it/s]

Epoch 1570, Loss: 0.0141
Epoch 1571, Loss: 0.0141


Training:  63%|██████▎   | 1572/2500 [04:46<02:49,  5.46it/s]

Epoch 1572, Loss: 0.0141
Epoch 1573, Loss: 0.0141


Training:  63%|██████▎   | 1574/2500 [04:46<02:48,  5.48it/s]

Epoch 1574, Loss: 0.0141
Epoch 1575, Loss: 0.0141


Training:  63%|██████▎   | 1576/2500 [04:46<02:47,  5.51it/s]

Epoch 1576, Loss: 0.0141
Epoch 1577, Loss: 0.0141


Training:  63%|██████▎   | 1578/2500 [04:47<02:46,  5.52it/s]

Epoch 1578, Loss: 0.0141
Epoch 1579, Loss: 0.0141


Training:  63%|██████▎   | 1580/2500 [04:47<02:46,  5.53it/s]

Epoch 1580, Loss: 0.0141
Epoch 1581, Loss: 0.0141


Training:  63%|██████▎   | 1582/2500 [04:47<02:45,  5.54it/s]

Epoch 1582, Loss: 0.0141
Epoch 1583, Loss: 0.0141


Training:  63%|██████▎   | 1584/2500 [04:48<02:44,  5.55it/s]

Epoch 1584, Loss: 0.0141
Epoch 1585, Loss: 0.0141


Training:  63%|██████▎   | 1586/2500 [04:48<02:45,  5.51it/s]

Epoch 1586, Loss: 0.0141
Epoch 1587, Loss: 0.0141


Training:  64%|██████▎   | 1588/2500 [04:48<02:44,  5.54it/s]

Epoch 1588, Loss: 0.0141
Epoch 1589, Loss: 0.0141


Training:  64%|██████▎   | 1590/2500 [04:49<02:44,  5.52it/s]

Epoch 1590, Loss: 0.0141
Epoch 1591, Loss: 0.0141


Training:  64%|██████▎   | 1592/2500 [04:49<02:44,  5.53it/s]

Epoch 1592, Loss: 0.0141
Epoch 1593, Loss: 0.0141


Training:  64%|██████▍   | 1594/2500 [04:49<02:44,  5.51it/s]

Epoch 1594, Loss: 0.0141
Epoch 1595, Loss: 0.0141


Training:  64%|██████▍   | 1596/2500 [04:50<02:43,  5.54it/s]

Epoch 1596, Loss: 0.0141
Epoch 1597, Loss: 0.0141


Training:  64%|██████▍   | 1598/2500 [04:50<02:42,  5.55it/s]

Epoch 1598, Loss: 0.0141
Epoch 1599, Loss: 0.0141


Training:  64%|██████▍   | 1600/2500 [04:51<02:42,  5.55it/s]

Epoch 1600, Loss: 0.0141
Epoch 1601, Loss: 0.0141


Training:  64%|██████▍   | 1602/2500 [04:51<02:42,  5.54it/s]

Epoch 1602, Loss: 0.0141
Epoch 1603, Loss: 0.0141


Training:  64%|██████▍   | 1604/2500 [04:51<02:42,  5.52it/s]

Epoch 1604, Loss: 0.0141
Epoch 1605, Loss: 0.0141


Training:  64%|██████▍   | 1606/2500 [04:52<02:41,  5.53it/s]

Epoch 1606, Loss: 0.0141
Epoch 1607, Loss: 0.0141


Training:  64%|██████▍   | 1608/2500 [04:52<02:41,  5.51it/s]

Epoch 1608, Loss: 0.0141
Epoch 1609, Loss: 0.0141


Training:  64%|██████▍   | 1610/2500 [04:52<02:40,  5.53it/s]

Epoch 1610, Loss: 0.0141
Epoch 1611, Loss: 0.0141


Training:  64%|██████▍   | 1612/2500 [04:53<02:40,  5.54it/s]

Epoch 1612, Loss: 0.0141
Epoch 1613, Loss: 0.0141


Training:  65%|██████▍   | 1614/2500 [04:53<02:40,  5.54it/s]

Epoch 1614, Loss: 0.0141
Epoch 1615, Loss: 0.0141


Training:  65%|██████▍   | 1616/2500 [04:53<02:39,  5.54it/s]

Epoch 1616, Loss: 0.0141
Epoch 1617, Loss: 0.0141


Training:  65%|██████▍   | 1618/2500 [04:54<02:38,  5.56it/s]

Epoch 1618, Loss: 0.0141
Epoch 1619, Loss: 0.0141


Training:  65%|██████▍   | 1620/2500 [04:54<02:38,  5.55it/s]

Epoch 1620, Loss: 0.0141
Epoch 1621, Loss: 0.0141


Training:  65%|██████▍   | 1622/2500 [04:55<02:38,  5.55it/s]

Epoch 1622, Loss: 0.0141
Epoch 1623, Loss: 0.0141


Training:  65%|██████▍   | 1624/2500 [04:55<02:38,  5.54it/s]

Epoch 1624, Loss: 0.0141
Epoch 1625, Loss: 0.0141


Training:  65%|██████▌   | 1626/2500 [04:55<02:40,  5.44it/s]

Epoch 1626, Loss: 0.0141
Epoch 1627, Loss: 0.0141


Training:  65%|██████▌   | 1628/2500 [04:56<02:40,  5.45it/s]

Epoch 1628, Loss: 0.0141
Epoch 1629, Loss: 0.0141


Training:  65%|██████▌   | 1630/2500 [04:56<02:40,  5.44it/s]

Epoch 1630, Loss: 0.0141
Epoch 1631, Loss: 0.0141


Training:  65%|██████▌   | 1632/2500 [04:56<02:38,  5.46it/s]

Epoch 1632, Loss: 0.0141
Epoch 1633, Loss: 0.0141


Training:  65%|██████▌   | 1634/2500 [04:57<02:40,  5.40it/s]

Epoch 1634, Loss: 0.0141
Epoch 1635, Loss: 0.0141


Training:  65%|██████▌   | 1636/2500 [04:57<02:41,  5.35it/s]

Epoch 1636, Loss: 0.0141
Epoch 1637, Loss: 0.0141


Training:  66%|██████▌   | 1638/2500 [04:57<02:38,  5.45it/s]

Epoch 1638, Loss: 0.0141
Epoch 1639, Loss: 0.0141


Training:  66%|██████▌   | 1640/2500 [04:58<02:36,  5.49it/s]

Epoch 1640, Loss: 0.0141
Epoch 1641, Loss: 0.0141


Training:  66%|██████▌   | 1642/2500 [04:58<02:35,  5.50it/s]

Epoch 1642, Loss: 0.0141
Epoch 1643, Loss: 0.0141


Training:  66%|██████▌   | 1644/2500 [04:59<02:34,  5.53it/s]

Epoch 1644, Loss: 0.0141
Epoch 1645, Loss: 0.0141


Training:  66%|██████▌   | 1646/2500 [04:59<02:34,  5.54it/s]

Epoch 1646, Loss: 0.0141
Epoch 1647, Loss: 0.0141


Training:  66%|██████▌   | 1648/2500 [04:59<02:34,  5.52it/s]

Epoch 1648, Loss: 0.0141
Epoch 1649, Loss: 0.0141


Training:  66%|██████▌   | 1650/2500 [05:00<02:33,  5.53it/s]

Epoch 1650, Loss: 0.0141
Epoch 1651, Loss: 0.0141


Training:  66%|██████▌   | 1652/2500 [05:00<02:33,  5.54it/s]

Epoch 1652, Loss: 0.0141
Epoch 1653, Loss: 0.0141


Training:  66%|██████▌   | 1654/2500 [05:00<02:32,  5.54it/s]

Epoch 1654, Loss: 0.0141
Epoch 1655, Loss: 0.0141


Training:  66%|██████▌   | 1656/2500 [05:01<02:31,  5.55it/s]

Epoch 1656, Loss: 0.0141
Epoch 1657, Loss: 0.0141


Training:  66%|██████▋   | 1658/2500 [05:01<02:31,  5.55it/s]

Epoch 1658, Loss: 0.0141
Epoch 1659, Loss: 0.0141


Training:  66%|██████▋   | 1660/2500 [05:01<02:31,  5.55it/s]

Epoch 1660, Loss: 0.0141
Epoch 1661, Loss: 0.0141


Training:  66%|██████▋   | 1662/2500 [05:02<02:31,  5.55it/s]

Epoch 1662, Loss: 0.0141
Epoch 1663, Loss: 0.0141


Training:  67%|██████▋   | 1664/2500 [05:02<02:30,  5.54it/s]

Epoch 1664, Loss: 0.0141
Epoch 1665, Loss: 0.0141


Training:  67%|██████▋   | 1666/2500 [05:03<02:30,  5.53it/s]

Epoch 1666, Loss: 0.0141
Epoch 1667, Loss: 0.0141


Training:  67%|██████▋   | 1668/2500 [05:03<02:30,  5.53it/s]

Epoch 1668, Loss: 0.0141
Epoch 1669, Loss: 0.0141


Training:  67%|██████▋   | 1670/2500 [05:03<02:31,  5.49it/s]

Epoch 1670, Loss: 0.0141
Epoch 1671, Loss: 0.0141


Training:  67%|██████▋   | 1672/2500 [05:04<02:30,  5.52it/s]

Epoch 1672, Loss: 0.0141
Epoch 1673, Loss: 0.0141


Training:  67%|██████▋   | 1674/2500 [05:04<02:29,  5.52it/s]

Epoch 1674, Loss: 0.0141
Epoch 1675, Loss: 0.0141


Training:  67%|██████▋   | 1676/2500 [05:04<02:29,  5.52it/s]

Epoch 1676, Loss: 0.0141
Epoch 1677, Loss: 0.0141


Training:  67%|██████▋   | 1678/2500 [05:05<02:29,  5.51it/s]

Epoch 1678, Loss: 0.0141
Epoch 1679, Loss: 0.0141


Training:  67%|██████▋   | 1680/2500 [05:05<02:28,  5.53it/s]

Epoch 1680, Loss: 0.0141
Epoch 1681, Loss: 0.0141


Training:  67%|██████▋   | 1682/2500 [05:05<02:28,  5.52it/s]

Epoch 1682, Loss: 0.0141
Epoch 1683, Loss: 0.0141


Training:  67%|██████▋   | 1684/2500 [05:06<02:27,  5.53it/s]

Epoch 1684, Loss: 0.0141
Epoch 1685, Loss: 0.0141


Training:  67%|██████▋   | 1686/2500 [05:06<02:27,  5.51it/s]

Epoch 1686, Loss: 0.0141
Epoch 1687, Loss: 0.0141


Training:  68%|██████▊   | 1688/2500 [05:07<02:27,  5.49it/s]

Epoch 1688, Loss: 0.0141
Epoch 1689, Loss: 0.0141


Training:  68%|██████▊   | 1690/2500 [05:07<02:26,  5.52it/s]

Epoch 1690, Loss: 0.0141
Epoch 1691, Loss: 0.0141


Training:  68%|██████▊   | 1692/2500 [05:07<02:26,  5.50it/s]

Epoch 1692, Loss: 0.0141
Epoch 1693, Loss: 0.0141


Training:  68%|██████▊   | 1694/2500 [05:08<02:28,  5.44it/s]

Epoch 1694, Loss: 0.0141
Epoch 1695, Loss: 0.0141


Training:  68%|██████▊   | 1696/2500 [05:08<02:28,  5.42it/s]

Epoch 1696, Loss: 0.0141
Epoch 1697, Loss: 0.0141


Training:  68%|██████▊   | 1698/2500 [05:08<02:27,  5.42it/s]

Epoch 1698, Loss: 0.0141
Epoch 1699, Loss: 0.0141


Training:  68%|██████▊   | 1700/2500 [05:09<02:27,  5.42it/s]

Epoch 1700, Loss: 0.0141
Epoch 1701, Loss: 0.0141


Training:  68%|██████▊   | 1702/2500 [05:09<02:29,  5.35it/s]

Epoch 1702, Loss: 0.0141
Epoch 1703, Loss: 0.0141


Training:  68%|██████▊   | 1704/2500 [05:09<02:27,  5.38it/s]

Epoch 1704, Loss: 0.0141
Epoch 1705, Loss: 0.0141


Training:  68%|██████▊   | 1706/2500 [05:10<02:25,  5.47it/s]

Epoch 1706, Loss: 0.0141
Epoch 1707, Loss: 0.0141


Training:  68%|██████▊   | 1708/2500 [05:10<02:23,  5.51it/s]

Epoch 1708, Loss: 0.0141
Epoch 1709, Loss: 0.0141


Training:  68%|██████▊   | 1710/2500 [05:11<02:22,  5.54it/s]

Epoch 1710, Loss: 0.0141
Epoch 1711, Loss: 0.0141


Training:  68%|██████▊   | 1712/2500 [05:11<02:22,  5.54it/s]

Epoch 1712, Loss: 0.0141
Epoch 1713, Loss: 0.0141


Training:  69%|██████▊   | 1714/2500 [05:11<02:22,  5.51it/s]

Epoch 1714, Loss: 0.0141
Epoch 1715, Loss: 0.0141


Training:  69%|██████▊   | 1716/2500 [05:12<02:21,  5.53it/s]

Epoch 1716, Loss: 0.0141
Epoch 1717, Loss: 0.0141


Training:  69%|██████▊   | 1718/2500 [05:12<02:21,  5.55it/s]

Epoch 1718, Loss: 0.0141
Epoch 1719, Loss: 0.0141


Training:  69%|██████▉   | 1720/2500 [05:12<02:20,  5.54it/s]

Epoch 1720, Loss: 0.0141
Epoch 1721, Loss: 0.0141


Training:  69%|██████▉   | 1722/2500 [05:13<02:21,  5.50it/s]

Epoch 1722, Loss: 0.0141
Epoch 1723, Loss: 0.0141


Training:  69%|██████▉   | 1724/2500 [05:13<02:20,  5.52it/s]

Epoch 1724, Loss: 0.0141
Epoch 1725, Loss: 0.0141


Training:  69%|██████▉   | 1726/2500 [05:13<02:19,  5.55it/s]

Epoch 1726, Loss: 0.0141
Epoch 1727, Loss: 0.0141


Training:  69%|██████▉   | 1728/2500 [05:14<02:19,  5.55it/s]

Epoch 1728, Loss: 0.0141
Epoch 1729, Loss: 0.0141


Training:  69%|██████▉   | 1730/2500 [05:14<02:18,  5.55it/s]

Epoch 1730, Loss: 0.0141
Epoch 1731, Loss: 0.0141


Training:  69%|██████▉   | 1732/2500 [05:15<02:18,  5.56it/s]

Epoch 1732, Loss: 0.0141
Epoch 1733, Loss: 0.0141


Training:  69%|██████▉   | 1734/2500 [05:15<02:18,  5.54it/s]

Epoch 1734, Loss: 0.0141
Epoch 1735, Loss: 0.0141


Training:  69%|██████▉   | 1736/2500 [05:15<02:18,  5.53it/s]

Epoch 1736, Loss: 0.0141
Epoch 1737, Loss: 0.0141


Training:  70%|██████▉   | 1738/2500 [05:16<02:17,  5.54it/s]

Epoch 1738, Loss: 0.0141
Epoch 1739, Loss: 0.0141


Training:  70%|██████▉   | 1740/2500 [05:16<02:17,  5.53it/s]

Epoch 1740, Loss: 0.0141
Epoch 1741, Loss: 0.0141


Training:  70%|██████▉   | 1742/2500 [05:16<02:17,  5.52it/s]

Epoch 1742, Loss: 0.0141
Epoch 1743, Loss: 0.0141


Training:  70%|██████▉   | 1744/2500 [05:17<02:16,  5.53it/s]

Epoch 1744, Loss: 0.0141
Epoch 1745, Loss: 0.0141


Training:  70%|██████▉   | 1746/2500 [05:17<02:16,  5.51it/s]

Epoch 1746, Loss: 0.0141
Epoch 1747, Loss: 0.0141


Training:  70%|██████▉   | 1748/2500 [05:17<02:15,  5.53it/s]

Epoch 1748, Loss: 0.0141
Epoch 1749, Loss: 0.0141


Training:  70%|███████   | 1750/2500 [05:18<02:16,  5.49it/s]

Epoch 1750, Loss: 0.0141
Epoch 1751, Loss: 0.0141


Training:  70%|███████   | 1752/2500 [05:18<02:14,  5.57it/s]

THRESHOLDING: 8 active coefficients
Epoch 1752, Loss: 0.0141


Training:  70%|███████   | 1753/2500 [05:18<02:14,  5.57it/s]

Epoch 1753, Loss: 0.0141
Epoch 1754, Loss: 0.0141


Training:  70%|███████   | 1755/2500 [05:19<02:13,  5.56it/s]

Epoch 1755, Loss: 0.0141
Epoch 1756, Loss: 0.0141


Training:  70%|███████   | 1757/2500 [05:19<02:14,  5.54it/s]

Epoch 1757, Loss: 0.0141
Epoch 1758, Loss: 0.0141


Training:  70%|███████   | 1759/2500 [05:19<02:15,  5.49it/s]

Epoch 1759, Loss: 0.0141
Epoch 1760, Loss: 0.0141


Training:  70%|███████   | 1761/2500 [05:20<02:16,  5.43it/s]

Epoch 1761, Loss: 0.0141
Epoch 1762, Loss: 0.0141


Training:  71%|███████   | 1763/2500 [05:20<02:16,  5.41it/s]

Epoch 1763, Loss: 0.0141
Epoch 1764, Loss: 0.0141


Training:  71%|███████   | 1765/2500 [05:21<02:16,  5.40it/s]

Epoch 1765, Loss: 0.0141
Epoch 1766, Loss: 0.0141


Training:  71%|███████   | 1767/2500 [05:21<02:16,  5.39it/s]

Epoch 1767, Loss: 0.0141
Epoch 1768, Loss: 0.0141


Training:  71%|███████   | 1769/2500 [05:21<02:18,  5.29it/s]

Epoch 1769, Loss: 0.0141
Epoch 1770, Loss: 0.0141


Training:  71%|███████   | 1771/2500 [05:22<02:15,  5.37it/s]

Epoch 1771, Loss: 0.0141
Epoch 1772, Loss: 0.0141


Training:  71%|███████   | 1773/2500 [05:22<02:13,  5.45it/s]

Epoch 1773, Loss: 0.0141
Epoch 1774, Loss: 0.0141


Training:  71%|███████   | 1775/2500 [05:22<02:11,  5.51it/s]

Epoch 1775, Loss: 0.0141
Epoch 1776, Loss: 0.0141


Training:  71%|███████   | 1777/2500 [05:23<02:10,  5.53it/s]

Epoch 1777, Loss: 0.0141
Epoch 1778, Loss: 0.0141


Training:  71%|███████   | 1779/2500 [05:23<02:10,  5.52it/s]

Epoch 1779, Loss: 0.0141
Epoch 1780, Loss: 0.0141


Training:  71%|███████   | 1781/2500 [05:23<02:09,  5.54it/s]

Epoch 1781, Loss: 0.0141
Epoch 1782, Loss: 0.0141


Training:  71%|███████▏  | 1783/2500 [05:24<02:09,  5.54it/s]

Epoch 1783, Loss: 0.0141
Epoch 1784, Loss: 0.0141


Training:  71%|███████▏  | 1785/2500 [05:24<02:09,  5.51it/s]

Epoch 1785, Loss: 0.0141
Epoch 1786, Loss: 0.0141


Training:  71%|███████▏  | 1787/2500 [05:25<02:08,  5.53it/s]

Epoch 1787, Loss: 0.0141
Epoch 1788, Loss: 0.0141


Training:  72%|███████▏  | 1789/2500 [05:25<02:08,  5.54it/s]

Epoch 1789, Loss: 0.0141
Epoch 1790, Loss: 0.0141


Training:  72%|███████▏  | 1791/2500 [05:25<02:08,  5.52it/s]

Epoch 1791, Loss: 0.0141
Epoch 1792, Loss: 0.0141


Training:  72%|███████▏  | 1793/2500 [05:26<02:07,  5.54it/s]

Epoch 1793, Loss: 0.0141
Epoch 1794, Loss: 0.0141


Training:  72%|███████▏  | 1795/2500 [05:26<02:07,  5.55it/s]

Epoch 1795, Loss: 0.0141
Epoch 1796, Loss: 0.0141


Training:  72%|███████▏  | 1797/2500 [05:26<02:07,  5.53it/s]

Epoch 1797, Loss: 0.0141
Epoch 1798, Loss: 0.0141


Training:  72%|███████▏  | 1799/2500 [05:27<02:06,  5.55it/s]

Epoch 1799, Loss: 0.0141
Epoch 1800, Loss: 0.0141


Training:  72%|███████▏  | 1801/2500 [05:27<02:05,  5.55it/s]

Epoch 1801, Loss: 0.0141
Epoch 1802, Loss: 0.0141


Training:  72%|███████▏  | 1803/2500 [05:27<02:05,  5.54it/s]

Epoch 1803, Loss: 0.0141
Epoch 1804, Loss: 0.0141


Training:  72%|███████▏  | 1805/2500 [05:28<02:05,  5.53it/s]

Epoch 1805, Loss: 0.0141
Epoch 1806, Loss: 0.0141


Training:  72%|███████▏  | 1807/2500 [05:28<02:05,  5.52it/s]

Epoch 1807, Loss: 0.0141
Epoch 1808, Loss: 0.0141


Training:  72%|███████▏  | 1809/2500 [05:29<02:06,  5.48it/s]

Epoch 1809, Loss: 0.0141
Epoch 1810, Loss: 0.0141


Training:  72%|███████▏  | 1811/2500 [05:29<02:04,  5.51it/s]

Epoch 1811, Loss: 0.0141
Epoch 1812, Loss: 0.0141


Training:  73%|███████▎  | 1813/2500 [05:29<02:05,  5.49it/s]

Epoch 1813, Loss: 0.0141
Epoch 1814, Loss: 0.0141


Training:  73%|███████▎  | 1815/2500 [05:30<02:04,  5.52it/s]

Epoch 1815, Loss: 0.0141
Epoch 1816, Loss: 0.0141


Training:  73%|███████▎  | 1817/2500 [05:30<02:04,  5.51it/s]

Epoch 1817, Loss: 0.0141
Epoch 1818, Loss: 0.0141


Training:  73%|███████▎  | 1819/2500 [05:30<02:03,  5.51it/s]

Epoch 1819, Loss: 0.0141
Epoch 1820, Loss: 0.0141


Training:  73%|███████▎  | 1821/2500 [05:31<02:02,  5.52it/s]

Epoch 1821, Loss: 0.0141
Epoch 1822, Loss: 0.0141


Training:  73%|███████▎  | 1823/2500 [05:31<02:02,  5.53it/s]

Epoch 1823, Loss: 0.0141
Epoch 1824, Loss: 0.0141


Training:  73%|███████▎  | 1825/2500 [05:31<02:02,  5.52it/s]

Epoch 1825, Loss: 0.0141
Epoch 1826, Loss: 0.0141


Training:  73%|███████▎  | 1827/2500 [05:32<02:04,  5.43it/s]

Epoch 1827, Loss: 0.0141
Epoch 1828, Loss: 0.0141


Training:  73%|███████▎  | 1829/2500 [05:32<02:06,  5.32it/s]

Epoch 1829, Loss: 0.0141
Epoch 1830, Loss: 0.0141


Training:  73%|███████▎  | 1831/2500 [05:33<02:05,  5.32it/s]

Epoch 1831, Loss: 0.0141
Epoch 1832, Loss: 0.0141


Training:  73%|███████▎  | 1833/2500 [05:33<02:04,  5.37it/s]

Epoch 1833, Loss: 0.0141
Epoch 1834, Loss: 0.0141


Training:  73%|███████▎  | 1835/2500 [05:33<02:05,  5.30it/s]

Epoch 1835, Loss: 0.0141
Epoch 1836, Loss: 0.0141


Training:  73%|███████▎  | 1837/2500 [05:34<02:05,  5.29it/s]

Epoch 1837, Loss: 0.0141
Epoch 1838, Loss: 0.0141


Training:  74%|███████▎  | 1839/2500 [05:34<02:03,  5.37it/s]

Epoch 1839, Loss: 0.0141
Epoch 1840, Loss: 0.0141


Training:  74%|███████▎  | 1841/2500 [05:34<02:00,  5.45it/s]

Epoch 1841, Loss: 0.0141
Epoch 1842, Loss: 0.0141


Training:  74%|███████▎  | 1843/2500 [05:35<01:59,  5.48it/s]

Epoch 1843, Loss: 0.0141
Epoch 1844, Loss: 0.0141


Training:  74%|███████▍  | 1845/2500 [05:35<01:59,  5.46it/s]

Epoch 1845, Loss: 0.0141
Epoch 1846, Loss: 0.0141


Training:  74%|███████▍  | 1847/2500 [05:36<01:59,  5.47it/s]

Epoch 1847, Loss: 0.0141
Epoch 1848, Loss: 0.0141


Training:  74%|███████▍  | 1849/2500 [05:36<01:59,  5.45it/s]

Epoch 1849, Loss: 0.0141
Epoch 1850, Loss: 0.0141


Training:  74%|███████▍  | 1851/2500 [05:36<01:58,  5.48it/s]

Epoch 1851, Loss: 0.0141
Epoch 1852, Loss: 0.0141


Training:  74%|███████▍  | 1853/2500 [05:37<01:58,  5.45it/s]

Epoch 1853, Loss: 0.0141
Epoch 1854, Loss: 0.0141


Training:  74%|███████▍  | 1855/2500 [05:37<01:57,  5.48it/s]

Epoch 1855, Loss: 0.0141
Epoch 1856, Loss: 0.0141


Training:  74%|███████▍  | 1857/2500 [05:37<01:57,  5.49it/s]

Epoch 1857, Loss: 0.0141
Epoch 1858, Loss: 0.0141


Training:  74%|███████▍  | 1859/2500 [05:38<01:56,  5.49it/s]

Epoch 1859, Loss: 0.0141
Epoch 1860, Loss: 0.0141


Training:  74%|███████▍  | 1861/2500 [05:38<01:56,  5.50it/s]

Epoch 1861, Loss: 0.0141
Epoch 1862, Loss: 0.0141


Training:  75%|███████▍  | 1863/2500 [05:38<01:55,  5.50it/s]

Epoch 1863, Loss: 0.0141
Epoch 1864, Loss: 0.0141


Training:  75%|███████▍  | 1865/2500 [05:39<01:56,  5.45it/s]

Epoch 1865, Loss: 0.0141
Epoch 1866, Loss: 0.0141


Training:  75%|███████▍  | 1867/2500 [05:39<01:55,  5.50it/s]

Epoch 1867, Loss: 0.0141
Epoch 1868, Loss: 0.0141


Training:  75%|███████▍  | 1869/2500 [05:40<01:54,  5.51it/s]

Epoch 1869, Loss: 0.0141
Epoch 1870, Loss: 0.0141


Training:  75%|███████▍  | 1871/2500 [05:40<01:54,  5.50it/s]

Epoch 1871, Loss: 0.0141
Epoch 1872, Loss: 0.0141


Training:  75%|███████▍  | 1873/2500 [05:40<01:53,  5.52it/s]

Epoch 1873, Loss: 0.0141
Epoch 1874, Loss: 0.0141


Training:  75%|███████▌  | 1875/2500 [05:41<01:53,  5.52it/s]

Epoch 1875, Loss: 0.0141
Epoch 1876, Loss: 0.0141


Training:  75%|███████▌  | 1877/2500 [05:41<01:53,  5.51it/s]

Epoch 1877, Loss: 0.0141
Epoch 1878, Loss: 0.0141


Training:  75%|███████▌  | 1879/2500 [05:41<01:52,  5.52it/s]

Epoch 1879, Loss: 0.0141
Epoch 1880, Loss: 0.0141


Training:  75%|███████▌  | 1881/2500 [05:42<01:52,  5.52it/s]

Epoch 1881, Loss: 0.0141
Epoch 1882, Loss: 0.0141


Training:  75%|███████▌  | 1883/2500 [05:42<01:51,  5.51it/s]

Epoch 1883, Loss: 0.0141
Epoch 1884, Loss: 0.0141


Training:  75%|███████▌  | 1885/2500 [05:42<01:51,  5.50it/s]

Epoch 1885, Loss: 0.0141
Epoch 1886, Loss: 0.0141


Training:  75%|███████▌  | 1887/2500 [05:43<01:52,  5.47it/s]

Epoch 1887, Loss: 0.0141
Epoch 1888, Loss: 0.0141


Training:  76%|███████▌  | 1889/2500 [05:43<01:51,  5.49it/s]

Epoch 1889, Loss: 0.0141
Epoch 1890, Loss: 0.0141


Training:  76%|███████▌  | 1891/2500 [05:44<01:50,  5.50it/s]

Epoch 1891, Loss: 0.0141
Epoch 1892, Loss: 0.0141


Training:  76%|███████▌  | 1893/2500 [05:44<01:50,  5.48it/s]

Epoch 1893, Loss: 0.0141
Epoch 1894, Loss: 0.0141


Training:  76%|███████▌  | 1895/2500 [05:44<01:52,  5.37it/s]

Epoch 1895, Loss: 0.0141
Epoch 1896, Loss: 0.0141


Training:  76%|███████▌  | 1897/2500 [05:45<01:51,  5.39it/s]

Epoch 1897, Loss: 0.0141
Epoch 1898, Loss: 0.0141


Training:  76%|███████▌  | 1899/2500 [05:45<01:54,  5.26it/s]

Epoch 1899, Loss: 0.0141


Training:  76%|███████▌  | 1900/2500 [05:45<01:56,  5.16it/s]

Epoch 1900, Loss: 0.0141
Epoch 1901, Loss: 0.0141


Training:  76%|███████▌  | 1902/2500 [05:46<01:54,  5.21it/s]

Epoch 1902, Loss: 0.0141
Epoch 1903, Loss: 0.0141


Training:  76%|███████▌  | 1904/2500 [05:46<01:54,  5.20it/s]

Epoch 1904, Loss: 0.0141
Epoch 1905, Loss: 0.0141


Training:  76%|███████▌  | 1906/2500 [05:46<01:52,  5.30it/s]

Epoch 1906, Loss: 0.0141
Epoch 1907, Loss: 0.0141


Training:  76%|███████▋  | 1908/2500 [05:47<01:49,  5.41it/s]

Epoch 1908, Loss: 0.0141
Epoch 1909, Loss: 0.0141


Training:  76%|███████▋  | 1910/2500 [05:47<01:48,  5.45it/s]

Epoch 1910, Loss: 0.0141
Epoch 1911, Loss: 0.0141


Training:  76%|███████▋  | 1912/2500 [05:47<01:47,  5.47it/s]

Epoch 1912, Loss: 0.0141
Epoch 1913, Loss: 0.0141


Training:  77%|███████▋  | 1914/2500 [05:48<01:46,  5.48it/s]

Epoch 1914, Loss: 0.0141
Epoch 1915, Loss: 0.0141


Training:  77%|███████▋  | 1916/2500 [05:48<01:46,  5.46it/s]

Epoch 1916, Loss: 0.0141
Epoch 1917, Loss: 0.0141


Training:  77%|███████▋  | 1918/2500 [05:49<01:46,  5.49it/s]

Epoch 1918, Loss: 0.0141
Epoch 1919, Loss: 0.0141


Training:  77%|███████▋  | 1920/2500 [05:49<01:45,  5.50it/s]

Epoch 1920, Loss: 0.0141
Epoch 1921, Loss: 0.0141


Training:  77%|███████▋  | 1922/2500 [05:49<01:45,  5.48it/s]

Epoch 1922, Loss: 0.0141
Epoch 1923, Loss: 0.0141


Training:  77%|███████▋  | 1924/2500 [05:50<01:45,  5.47it/s]

Epoch 1924, Loss: 0.0141
Epoch 1925, Loss: 0.0141


Training:  77%|███████▋  | 1926/2500 [05:50<01:44,  5.50it/s]

Epoch 1926, Loss: 0.0141
Epoch 1927, Loss: 0.0141


Training:  77%|███████▋  | 1928/2500 [05:50<01:43,  5.51it/s]

Epoch 1928, Loss: 0.0141
Epoch 1929, Loss: 0.0141


Training:  77%|███████▋  | 1930/2500 [05:51<01:43,  5.52it/s]

Epoch 1930, Loss: 0.0141
Epoch 1931, Loss: 0.0141


Training:  77%|███████▋  | 1932/2500 [05:51<01:42,  5.52it/s]

Epoch 1932, Loss: 0.0141
Epoch 1933, Loss: 0.0141


Training:  77%|███████▋  | 1934/2500 [05:51<01:42,  5.50it/s]

Epoch 1934, Loss: 0.0141
Epoch 1935, Loss: 0.0141


Training:  77%|███████▋  | 1936/2500 [05:52<01:42,  5.52it/s]

Epoch 1936, Loss: 0.0141
Epoch 1937, Loss: 0.0141


Training:  78%|███████▊  | 1938/2500 [05:52<01:42,  5.48it/s]

Epoch 1938, Loss: 0.0141
Epoch 1939, Loss: 0.0141


Training:  78%|███████▊  | 1940/2500 [05:53<01:42,  5.49it/s]

Epoch 1940, Loss: 0.0141
Epoch 1941, Loss: 0.0141


Training:  78%|███████▊  | 1942/2500 [05:53<01:41,  5.51it/s]

Epoch 1942, Loss: 0.0141
Epoch 1943, Loss: 0.0141


Training:  78%|███████▊  | 1944/2500 [05:53<01:41,  5.49it/s]

Epoch 1944, Loss: 0.0141
Epoch 1945, Loss: 0.0141


Training:  78%|███████▊  | 1946/2500 [05:54<01:40,  5.51it/s]

Epoch 1946, Loss: 0.0141
Epoch 1947, Loss: 0.0141


Training:  78%|███████▊  | 1948/2500 [05:54<01:40,  5.50it/s]

Epoch 1948, Loss: 0.0141
Epoch 1949, Loss: 0.0141


Training:  78%|███████▊  | 1950/2500 [05:54<01:40,  5.45it/s]

Epoch 1950, Loss: 0.0141
Epoch 1951, Loss: 0.0141


Training:  78%|███████▊  | 1952/2500 [05:55<01:40,  5.47it/s]

Epoch 1952, Loss: 0.0141
Epoch 1953, Loss: 0.0141


Training:  78%|███████▊  | 1954/2500 [05:55<01:39,  5.49it/s]

Epoch 1954, Loss: 0.0141
Epoch 1955, Loss: 0.0141


Training:  78%|███████▊  | 1956/2500 [05:55<01:39,  5.48it/s]

Epoch 1956, Loss: 0.0141
Epoch 1957, Loss: 0.0141


Training:  78%|███████▊  | 1958/2500 [05:56<01:38,  5.51it/s]

Epoch 1958, Loss: 0.0141
Epoch 1959, Loss: 0.0141


Training:  78%|███████▊  | 1960/2500 [05:56<01:38,  5.48it/s]

Epoch 1960, Loss: 0.0141
Epoch 1961, Loss: 0.0141


Training:  78%|███████▊  | 1962/2500 [05:57<01:40,  5.34it/s]

Epoch 1962, Loss: 0.0141
Epoch 1963, Loss: 0.0141


Training:  79%|███████▊  | 1964/2500 [05:57<01:41,  5.30it/s]

Epoch 1964, Loss: 0.0141
Epoch 1965, Loss: 0.0141


Training:  79%|███████▊  | 1966/2500 [05:57<01:41,  5.28it/s]

Epoch 1966, Loss: 0.0141
Epoch 1967, Loss: 0.0141


Training:  79%|███████▊  | 1968/2500 [05:58<01:40,  5.30it/s]

Epoch 1968, Loss: 0.0141
Epoch 1969, Loss: 0.0141


Training:  79%|███████▉  | 1970/2500 [05:58<01:40,  5.29it/s]

Epoch 1970, Loss: 0.0141
Epoch 1971, Loss: 0.0141


Training:  79%|███████▉  | 1972/2500 [05:58<01:40,  5.25it/s]

Epoch 1972, Loss: 0.0141
Epoch 1973, Loss: 0.0141


Training:  79%|███████▉  | 1974/2500 [05:59<01:37,  5.38it/s]

Epoch 1974, Loss: 0.0141
Epoch 1975, Loss: 0.0141


Training:  79%|███████▉  | 1976/2500 [05:59<01:36,  5.45it/s]

Epoch 1976, Loss: 0.0141
Epoch 1977, Loss: 0.0141


Training:  79%|███████▉  | 1978/2500 [06:00<01:36,  5.42it/s]

Epoch 1978, Loss: 0.0141
Epoch 1979, Loss: 0.0141


Training:  79%|███████▉  | 1980/2500 [06:00<01:35,  5.47it/s]

Epoch 1980, Loss: 0.0141
Epoch 1981, Loss: 0.0141


Training:  79%|███████▉  | 1982/2500 [06:00<01:34,  5.49it/s]

Epoch 1982, Loss: 0.0141
Epoch 1983, Loss: 0.0141


Training:  79%|███████▉  | 1984/2500 [06:01<01:33,  5.49it/s]

Epoch 1984, Loss: 0.0141
Epoch 1985, Loss: 0.0141


Training:  79%|███████▉  | 1986/2500 [06:01<01:33,  5.48it/s]

Epoch 1986, Loss: 0.0141
Epoch 1987, Loss: 0.0141


Training:  80%|███████▉  | 1988/2500 [06:01<01:32,  5.51it/s]

Epoch 1988, Loss: 0.0141
Epoch 1989, Loss: 0.0141


Training:  80%|███████▉  | 1990/2500 [06:02<01:32,  5.51it/s]

Epoch 1990, Loss: 0.0141
Epoch 1991, Loss: 0.0141


Training:  80%|███████▉  | 1992/2500 [06:02<01:32,  5.51it/s]

Epoch 1992, Loss: 0.0141
Epoch 1993, Loss: 0.0141


Training:  80%|███████▉  | 1994/2500 [06:02<01:31,  5.52it/s]

Epoch 1994, Loss: 0.0141
Epoch 1995, Loss: 0.0141


Training:  80%|███████▉  | 1996/2500 [06:03<01:31,  5.49it/s]

Epoch 1996, Loss: 0.0141
Epoch 1997, Loss: 0.0141


Training:  80%|███████▉  | 1998/2500 [06:03<01:31,  5.50it/s]

Epoch 1998, Loss: 0.0141
Epoch 1999, Loss: 0.0141


Training:  80%|████████  | 2000/2500 [06:04<01:31,  5.48it/s]

Epoch 2000, Loss: 0.0141
Epoch 2001, Loss: 0.0141


Training:  80%|████████  | 2002/2500 [06:04<01:29,  5.56it/s]

THRESHOLDING: 7 active coefficients
Epoch 2002, Loss: 0.0141


Training:  80%|████████  | 2003/2500 [06:04<01:29,  5.54it/s]

Epoch 2003, Loss: 0.0141
Epoch 2004, Loss: 0.0141


Training:  80%|████████  | 2005/2500 [06:04<01:29,  5.54it/s]

Epoch 2005, Loss: 0.0141
Epoch 2006, Loss: 0.0141


Training:  80%|████████  | 2007/2500 [06:05<01:29,  5.53it/s]

Epoch 2007, Loss: 0.0141
Epoch 2008, Loss: 0.0141


Training:  80%|████████  | 2009/2500 [06:05<01:28,  5.52it/s]

Epoch 2009, Loss: 0.0141
Epoch 2010, Loss: 0.0141


Training:  80%|████████  | 2011/2500 [06:06<01:28,  5.52it/s]

Epoch 2011, Loss: 0.0141
Epoch 2012, Loss: 0.0141


Training:  81%|████████  | 2013/2500 [06:06<01:28,  5.50it/s]

Epoch 2013, Loss: 0.0141
Epoch 2014, Loss: 0.0141


Training:  81%|████████  | 2015/2500 [06:06<01:28,  5.49it/s]

Epoch 2015, Loss: 0.0141
Epoch 2016, Loss: 0.0141


Training:  81%|████████  | 2017/2500 [06:07<01:28,  5.46it/s]

Epoch 2017, Loss: 0.0141
Epoch 2018, Loss: 0.0141


Training:  81%|████████  | 2019/2500 [06:07<01:27,  5.49it/s]

Epoch 2019, Loss: 0.0141
Epoch 2020, Loss: 0.0141


Training:  81%|████████  | 2021/2500 [06:07<01:27,  5.50it/s]

Epoch 2021, Loss: 0.0141
Epoch 2022, Loss: 0.0141


Training:  81%|████████  | 2023/2500 [06:08<01:26,  5.49it/s]

Epoch 2023, Loss: 0.0141
Epoch 2024, Loss: 0.0141


Training:  81%|████████  | 2025/2500 [06:08<01:26,  5.50it/s]

Epoch 2025, Loss: 0.0141
Epoch 2026, Loss: 0.0141


Training:  81%|████████  | 2027/2500 [06:08<01:26,  5.48it/s]

Epoch 2027, Loss: 0.0141
Epoch 2028, Loss: 0.0141


Training:  81%|████████  | 2029/2500 [06:09<01:29,  5.27it/s]

Epoch 2029, Loss: 0.0141
Epoch 2030, Loss: 0.0141


Training:  81%|████████  | 2031/2500 [06:09<01:28,  5.33it/s]

Epoch 2031, Loss: 0.0141
Epoch 2032, Loss: 0.0141


Training:  81%|████████▏ | 2033/2500 [06:10<01:28,  5.31it/s]

Epoch 2033, Loss: 0.0141
Epoch 2034, Loss: 0.0141


Training:  81%|████████▏ | 2035/2500 [06:10<01:27,  5.34it/s]

Epoch 2035, Loss: 0.0141
Epoch 2036, Loss: 0.0141


Training:  81%|████████▏ | 2037/2500 [06:10<01:27,  5.27it/s]

Epoch 2037, Loss: 0.0141
Epoch 2038, Loss: 0.0141


Training:  82%|████████▏ | 2039/2500 [06:11<01:26,  5.30it/s]

Epoch 2039, Loss: 0.0141
Epoch 2040, Loss: 0.0141


Training:  82%|████████▏ | 2041/2500 [06:11<01:24,  5.40it/s]

Epoch 2041, Loss: 0.0141
Epoch 2042, Loss: 0.0141


Training:  82%|████████▏ | 2043/2500 [06:11<01:23,  5.46it/s]

Epoch 2043, Loss: 0.0141
Epoch 2044, Loss: 0.0141


Training:  82%|████████▏ | 2045/2500 [06:12<01:22,  5.49it/s]

Epoch 2045, Loss: 0.0141
Epoch 2046, Loss: 0.0141


Training:  82%|████████▏ | 2047/2500 [06:12<01:22,  5.48it/s]

Epoch 2047, Loss: 0.0141
Epoch 2048, Loss: 0.0141


Training:  82%|████████▏ | 2049/2500 [06:13<01:22,  5.50it/s]

Epoch 2049, Loss: 0.0141
Epoch 2050, Loss: 0.0141


Training:  82%|████████▏ | 2051/2500 [06:13<01:21,  5.48it/s]

Epoch 2051, Loss: 0.0141
Epoch 2052, Loss: 0.0141


Training:  82%|████████▏ | 2053/2500 [06:13<01:21,  5.50it/s]

Epoch 2053, Loss: 0.0141
Epoch 2054, Loss: 0.0141


Training:  82%|████████▏ | 2055/2500 [06:14<01:20,  5.52it/s]

Epoch 2055, Loss: 0.0141
Epoch 2056, Loss: 0.0141


Training:  82%|████████▏ | 2057/2500 [06:14<01:20,  5.50it/s]

Epoch 2057, Loss: 0.0141
Epoch 2058, Loss: 0.0141


Training:  82%|████████▏ | 2059/2500 [06:14<01:19,  5.52it/s]

Epoch 2059, Loss: 0.0141
Epoch 2060, Loss: 0.0141


Training:  82%|████████▏ | 2061/2500 [06:15<01:19,  5.52it/s]

Epoch 2061, Loss: 0.0141
Epoch 2062, Loss: 0.0141


Training:  83%|████████▎ | 2063/2500 [06:15<01:19,  5.46it/s]

Epoch 2063, Loss: 0.0141
Epoch 2064, Loss: 0.0141


Training:  83%|████████▎ | 2065/2500 [06:15<01:19,  5.50it/s]

Epoch 2065, Loss: 0.0141
Epoch 2066, Loss: 0.0141


Training:  83%|████████▎ | 2067/2500 [06:16<01:18,  5.52it/s]

Epoch 2067, Loss: 0.0141
Epoch 2068, Loss: 0.0141


Training:  83%|████████▎ | 2069/2500 [06:16<01:18,  5.49it/s]

Epoch 2069, Loss: 0.0141
Epoch 2070, Loss: 0.0141


Training:  83%|████████▎ | 2071/2500 [06:17<01:17,  5.52it/s]

Epoch 2071, Loss: 0.0141
Epoch 2072, Loss: 0.0141


Training:  83%|████████▎ | 2073/2500 [06:17<01:17,  5.53it/s]

Epoch 2073, Loss: 0.0141
Epoch 2074, Loss: 0.0141


Training:  83%|████████▎ | 2075/2500 [06:17<01:17,  5.52it/s]

Epoch 2075, Loss: 0.0141
Epoch 2076, Loss: 0.0141


Training:  83%|████████▎ | 2077/2500 [06:18<01:16,  5.52it/s]

Epoch 2077, Loss: 0.0141
Epoch 2078, Loss: 0.0141


Training:  83%|████████▎ | 2079/2500 [06:18<01:16,  5.52it/s]

Epoch 2079, Loss: 0.0141
Epoch 2080, Loss: 0.0141


Training:  83%|████████▎ | 2081/2500 [06:18<01:15,  5.54it/s]

Epoch 2081, Loss: 0.0141
Epoch 2082, Loss: 0.0141


Training:  83%|████████▎ | 2083/2500 [06:19<01:15,  5.53it/s]

Epoch 2083, Loss: 0.0141
Epoch 2084, Loss: 0.0141


Training:  83%|████████▎ | 2085/2500 [06:19<01:15,  5.52it/s]

Epoch 2085, Loss: 0.0141
Epoch 2086, Loss: 0.0141


Training:  83%|████████▎ | 2087/2500 [06:19<01:15,  5.48it/s]

Epoch 2087, Loss: 0.0141
Epoch 2088, Loss: 0.0141


Training:  84%|████████▎ | 2089/2500 [06:20<01:14,  5.50it/s]

Epoch 2089, Loss: 0.0141
Epoch 2090, Loss: 0.0141


Training:  84%|████████▎ | 2091/2500 [06:20<01:14,  5.49it/s]

Epoch 2091, Loss: 0.0141
Epoch 2092, Loss: 0.0141


Training:  84%|████████▎ | 2093/2500 [06:21<01:13,  5.51it/s]

Epoch 2093, Loss: 0.0141
Epoch 2094, Loss: 0.0141


Training:  84%|████████▍ | 2095/2500 [06:21<01:14,  5.44it/s]

Epoch 2095, Loss: 0.0141
Epoch 2096, Loss: 0.0141


Training:  84%|████████▍ | 2097/2500 [06:21<01:15,  5.34it/s]

Epoch 2097, Loss: 0.0141
Epoch 2098, Loss: 0.0141


Training:  84%|████████▍ | 2099/2500 [06:22<01:14,  5.37it/s]

Epoch 2099, Loss: 0.0141
Epoch 2100, Loss: 0.0141


Training:  84%|████████▍ | 2101/2500 [06:22<01:13,  5.41it/s]

Epoch 2101, Loss: 0.0141
Epoch 2102, Loss: 0.0141


Training:  84%|████████▍ | 2103/2500 [06:22<01:15,  5.28it/s]

Epoch 2103, Loss: 0.0141
Epoch 2104, Loss: 0.0141


Training:  84%|████████▍ | 2105/2500 [06:23<01:14,  5.29it/s]

Epoch 2105, Loss: 0.0141
Epoch 2106, Loss: 0.0141


Training:  84%|████████▍ | 2107/2500 [06:23<01:13,  5.38it/s]

Epoch 2107, Loss: 0.0141
Epoch 2108, Loss: 0.0141


Training:  84%|████████▍ | 2109/2500 [06:24<01:11,  5.44it/s]

Epoch 2109, Loss: 0.0141
Epoch 2110, Loss: 0.0141


Training:  84%|████████▍ | 2111/2500 [06:24<01:10,  5.49it/s]

Epoch 2111, Loss: 0.0141
Epoch 2112, Loss: 0.0141


Training:  85%|████████▍ | 2113/2500 [06:24<01:10,  5.50it/s]

Epoch 2113, Loss: 0.0141
Epoch 2114, Loss: 0.0141


Training:  85%|████████▍ | 2115/2500 [06:25<01:09,  5.52it/s]

Epoch 2115, Loss: 0.0141
Epoch 2116, Loss: 0.0141


Training:  85%|████████▍ | 2117/2500 [06:25<01:09,  5.52it/s]

Epoch 2117, Loss: 0.0141
Epoch 2118, Loss: 0.0141


Training:  85%|████████▍ | 2119/2500 [06:25<01:09,  5.51it/s]

Epoch 2119, Loss: 0.0141
Epoch 2120, Loss: 0.0141


Training:  85%|████████▍ | 2121/2500 [06:26<01:09,  5.49it/s]

Epoch 2121, Loss: 0.0141
Epoch 2122, Loss: 0.0141


Training:  85%|████████▍ | 2123/2500 [06:26<01:08,  5.51it/s]

Epoch 2123, Loss: 0.0141
Epoch 2124, Loss: 0.0141


Training:  85%|████████▌ | 2125/2500 [06:26<01:08,  5.51it/s]

Epoch 2125, Loss: 0.0141
Epoch 2126, Loss: 0.0141


Training:  85%|████████▌ | 2127/2500 [06:27<01:07,  5.54it/s]

Epoch 2127, Loss: 0.0141
Epoch 2128, Loss: 0.0141


Training:  85%|████████▌ | 2129/2500 [06:27<01:06,  5.55it/s]

Epoch 2129, Loss: 0.0141
Epoch 2130, Loss: 0.0141


Training:  85%|████████▌ | 2131/2500 [06:28<01:06,  5.54it/s]

Epoch 2131, Loss: 0.0141
Epoch 2132, Loss: 0.0141


Training:  85%|████████▌ | 2133/2500 [06:28<01:06,  5.53it/s]

Epoch 2133, Loss: 0.0141
Epoch 2134, Loss: 0.0141


Training:  85%|████████▌ | 2135/2500 [06:28<01:05,  5.53it/s]

Epoch 2135, Loss: 0.0141
Epoch 2136, Loss: 0.0141


Training:  85%|████████▌ | 2137/2500 [06:29<01:05,  5.52it/s]

Epoch 2137, Loss: 0.0141
Epoch 2138, Loss: 0.0141


Training:  86%|████████▌ | 2139/2500 [06:29<01:05,  5.52it/s]

Epoch 2139, Loss: 0.0141
Epoch 2140, Loss: 0.0141


Training:  86%|████████▌ | 2141/2500 [06:29<01:04,  5.54it/s]

Epoch 2141, Loss: 0.0141
Epoch 2142, Loss: 0.0141


Training:  86%|████████▌ | 2143/2500 [06:30<01:04,  5.51it/s]

Epoch 2143, Loss: 0.0141
Epoch 2144, Loss: 0.0141


Training:  86%|████████▌ | 2145/2500 [06:30<01:04,  5.53it/s]

Epoch 2145, Loss: 0.0141
Epoch 2146, Loss: 0.0141


Training:  86%|████████▌ | 2147/2500 [06:30<01:03,  5.53it/s]

Epoch 2147, Loss: 0.0141
Epoch 2148, Loss: 0.0141


Training:  86%|████████▌ | 2149/2500 [06:31<01:03,  5.53it/s]

Epoch 2149, Loss: 0.0141
Epoch 2150, Loss: 0.0141


Training:  86%|████████▌ | 2151/2500 [06:31<01:02,  5.54it/s]

Epoch 2151, Loss: 0.0141
Epoch 2152, Loss: 0.0141


Training:  86%|████████▌ | 2153/2500 [06:32<01:02,  5.55it/s]

Epoch 2153, Loss: 0.0141
Epoch 2154, Loss: 0.0141


Training:  86%|████████▌ | 2155/2500 [06:32<01:02,  5.53it/s]

Epoch 2155, Loss: 0.0141
Epoch 2156, Loss: 0.0141


Training:  86%|████████▋ | 2157/2500 [06:32<01:01,  5.55it/s]

Epoch 2157, Loss: 0.0141
Epoch 2158, Loss: 0.0141


Training:  86%|████████▋ | 2159/2500 [06:33<01:01,  5.55it/s]

Epoch 2159, Loss: 0.0141
Epoch 2160, Loss: 0.0141


Training:  86%|████████▋ | 2161/2500 [06:33<01:01,  5.53it/s]

Epoch 2161, Loss: 0.0141
Epoch 2162, Loss: 0.0141


Training:  87%|████████▋ | 2163/2500 [06:33<01:01,  5.44it/s]

Epoch 2163, Loss: 0.0141
Epoch 2164, Loss: 0.0141


Training:  87%|████████▋ | 2165/2500 [06:34<01:01,  5.40it/s]

Epoch 2165, Loss: 0.0141
Epoch 2166, Loss: 0.0141


Training:  87%|████████▋ | 2167/2500 [06:34<01:01,  5.42it/s]

Epoch 2167, Loss: 0.0141
Epoch 2168, Loss: 0.0141


Training:  87%|████████▋ | 2169/2500 [06:34<01:00,  5.44it/s]

Epoch 2169, Loss: 0.0141
Epoch 2170, Loss: 0.0141


Training:  87%|████████▋ | 2171/2500 [06:35<01:02,  5.30it/s]

Epoch 2171, Loss: 0.0141
Epoch 2172, Loss: 0.0141


Training:  87%|████████▋ | 2173/2500 [06:35<01:01,  5.31it/s]

Epoch 2173, Loss: 0.0141
Epoch 2174, Loss: 0.0141


Training:  87%|████████▋ | 2175/2500 [06:36<01:00,  5.41it/s]

Epoch 2175, Loss: 0.0141
Epoch 2176, Loss: 0.0141


Training:  87%|████████▋ | 2177/2500 [06:36<00:59,  5.43it/s]

Epoch 2177, Loss: 0.0141
Epoch 2178, Loss: 0.0141


Training:  87%|████████▋ | 2179/2500 [06:36<00:58,  5.47it/s]

Epoch 2179, Loss: 0.0141
Epoch 2180, Loss: 0.0141


Training:  87%|████████▋ | 2181/2500 [06:37<00:57,  5.51it/s]

Epoch 2181, Loss: 0.0141
Epoch 2182, Loss: 0.0141


Training:  87%|████████▋ | 2183/2500 [06:37<00:57,  5.52it/s]

Epoch 2183, Loss: 0.0141
Epoch 2184, Loss: 0.0141


Training:  87%|████████▋ | 2185/2500 [06:37<00:56,  5.54it/s]

Epoch 2185, Loss: 0.0141
Epoch 2186, Loss: 0.0141


Training:  87%|████████▋ | 2187/2500 [06:38<00:56,  5.54it/s]

Epoch 2187, Loss: 0.0141
Epoch 2188, Loss: 0.0141


Training:  88%|████████▊ | 2189/2500 [06:38<00:56,  5.52it/s]

Epoch 2189, Loss: 0.0141
Epoch 2190, Loss: 0.0141


Training:  88%|████████▊ | 2191/2500 [06:38<00:55,  5.54it/s]

Epoch 2191, Loss: 0.0141
Epoch 2192, Loss: 0.0141


Training:  88%|████████▊ | 2193/2500 [06:39<00:55,  5.55it/s]

Epoch 2193, Loss: 0.0141
Epoch 2194, Loss: 0.0141


Training:  88%|████████▊ | 2195/2500 [06:39<00:55,  5.54it/s]

Epoch 2195, Loss: 0.0141
Epoch 2196, Loss: 0.0141


Training:  88%|████████▊ | 2197/2500 [06:40<00:54,  5.54it/s]

Epoch 2197, Loss: 0.0141
Epoch 2198, Loss: 0.0141


Training:  88%|████████▊ | 2199/2500 [06:40<00:54,  5.52it/s]

Epoch 2199, Loss: 0.0141
Epoch 2200, Loss: 0.0141


Training:  88%|████████▊ | 2201/2500 [06:40<00:54,  5.51it/s]

Epoch 2201, Loss: 0.0141
Epoch 2202, Loss: 0.0141


Training:  88%|████████▊ | 2203/2500 [06:41<00:53,  5.52it/s]

Epoch 2203, Loss: 0.0141
Epoch 2204, Loss: 0.0141


Training:  88%|████████▊ | 2205/2500 [06:41<00:53,  5.52it/s]

Epoch 2205, Loss: 0.0141
Epoch 2206, Loss: 0.0141


Training:  88%|████████▊ | 2207/2500 [06:41<00:52,  5.53it/s]

Epoch 2207, Loss: 0.0141
Epoch 2208, Loss: 0.0141


Training:  88%|████████▊ | 2209/2500 [06:42<00:52,  5.54it/s]

Epoch 2209, Loss: 0.0141
Epoch 2210, Loss: 0.0141


Training:  88%|████████▊ | 2211/2500 [06:42<00:52,  5.55it/s]

Epoch 2211, Loss: 0.0141
Epoch 2212, Loss: 0.0141


Training:  89%|████████▊ | 2213/2500 [06:42<00:51,  5.55it/s]

Epoch 2213, Loss: 0.0141
Epoch 2214, Loss: 0.0141


Training:  89%|████████▊ | 2215/2500 [06:43<00:51,  5.54it/s]

Epoch 2215, Loss: 0.0141
Epoch 2216, Loss: 0.0141


Training:  89%|████████▊ | 2217/2500 [06:43<00:51,  5.52it/s]

Epoch 2217, Loss: 0.0141
Epoch 2218, Loss: 0.0141


Training:  89%|████████▉ | 2219/2500 [06:44<00:50,  5.53it/s]

Epoch 2219, Loss: 0.0141
Epoch 2220, Loss: 0.0141


Training:  89%|████████▉ | 2221/2500 [06:44<00:50,  5.55it/s]

Epoch 2221, Loss: 0.0141
Epoch 2222, Loss: 0.0141


Training:  89%|████████▉ | 2223/2500 [06:44<00:50,  5.54it/s]

Epoch 2223, Loss: 0.0141
Epoch 2224, Loss: 0.0141


Training:  89%|████████▉ | 2225/2500 [06:45<00:49,  5.55it/s]

Epoch 2225, Loss: 0.0141
Epoch 2226, Loss: 0.0141


Training:  89%|████████▉ | 2227/2500 [06:45<00:49,  5.54it/s]

Epoch 2227, Loss: 0.0141
Epoch 2228, Loss: 0.0141


Training:  89%|████████▉ | 2229/2500 [06:45<00:49,  5.49it/s]

Epoch 2229, Loss: 0.0141
Epoch 2230, Loss: 0.0141


Training:  89%|████████▉ | 2231/2500 [06:46<00:49,  5.43it/s]

Epoch 2231, Loss: 0.0141
Epoch 2232, Loss: 0.0141


Training:  89%|████████▉ | 2233/2500 [06:46<00:49,  5.45it/s]

Epoch 2233, Loss: 0.0141


Training:  89%|████████▉ | 2234/2500 [06:46<00:50,  5.27it/s]

Epoch 2234, Loss: 0.0141
Epoch 2235, Loss: 0.0141


Training:  89%|████████▉ | 2236/2500 [06:47<00:49,  5.34it/s]

Epoch 2236, Loss: 0.0141
Epoch 2237, Loss: 0.0141


Training:  90%|████████▉ | 2238/2500 [06:47<00:49,  5.33it/s]

Epoch 2238, Loss: 0.0141
Epoch 2239, Loss: 0.0141


Training:  90%|████████▉ | 2240/2500 [06:47<00:49,  5.24it/s]

Epoch 2240, Loss: 0.0141
Epoch 2241, Loss: 0.0141


Training:  90%|████████▉ | 2242/2500 [06:48<00:47,  5.40it/s]

Epoch 2242, Loss: 0.0141
Epoch 2243, Loss: 0.0141


Training:  90%|████████▉ | 2244/2500 [06:48<00:46,  5.46it/s]

Epoch 2244, Loss: 0.0141
Epoch 2245, Loss: 0.0141


Training:  90%|████████▉ | 2246/2500 [06:48<00:46,  5.50it/s]

Epoch 2246, Loss: 0.0141
Epoch 2247, Loss: 0.0141


Training:  90%|████████▉ | 2248/2500 [06:49<00:45,  5.52it/s]

Epoch 2248, Loss: 0.0141
Epoch 2249, Loss: 0.0141


Training:  90%|█████████ | 2250/2500 [06:49<00:45,  5.53it/s]

Epoch 2250, Loss: 0.0141
Epoch 2251, Loss: 0.0141


Training:  90%|█████████ | 2252/2500 [06:50<00:44,  5.57it/s]

THRESHOLDING: 6 active coefficients
Epoch 2252, Loss: 0.0141


Training:  90%|█████████ | 2253/2500 [06:50<00:44,  5.55it/s]

Epoch 2253, Loss: 0.0141
Epoch 2254, Loss: 0.0141


Training:  90%|█████████ | 2255/2500 [06:50<00:44,  5.54it/s]

Epoch 2255, Loss: 0.0141
Epoch 2256, Loss: 0.0141


Training:  90%|█████████ | 2257/2500 [06:50<00:43,  5.53it/s]

Epoch 2257, Loss: 0.0141
Epoch 2258, Loss: 0.0141


Training:  90%|█████████ | 2259/2500 [06:51<00:43,  5.53it/s]

Epoch 2259, Loss: 0.0141
Epoch 2260, Loss: 0.0141


Training:  90%|█████████ | 2261/2500 [06:51<00:43,  5.54it/s]

Epoch 2261, Loss: 0.0141
Epoch 2262, Loss: 0.0141


Training:  91%|█████████ | 2263/2500 [06:52<00:42,  5.55it/s]

Epoch 2263, Loss: 0.0141
Epoch 2264, Loss: 0.0141


Training:  91%|█████████ | 2265/2500 [06:52<00:42,  5.55it/s]

Epoch 2265, Loss: 0.0141
Epoch 2266, Loss: 0.0141


Training:  91%|█████████ | 2267/2500 [06:52<00:41,  5.56it/s]

Epoch 2267, Loss: 0.0141
Epoch 2268, Loss: 0.0141


Training:  91%|█████████ | 2269/2500 [06:53<00:41,  5.56it/s]

Epoch 2269, Loss: 0.0141
Epoch 2270, Loss: 0.0141


Training:  91%|█████████ | 2271/2500 [06:53<00:41,  5.55it/s]

Epoch 2271, Loss: 0.0141
Epoch 2272, Loss: 0.0141


Training:  91%|█████████ | 2273/2500 [06:53<00:40,  5.54it/s]

Epoch 2273, Loss: 0.0141
Epoch 2274, Loss: 0.0141


Training:  91%|█████████ | 2275/2500 [06:54<00:40,  5.53it/s]

Epoch 2275, Loss: 0.0141
Epoch 2276, Loss: 0.0141


Training:  91%|█████████ | 2277/2500 [06:54<00:40,  5.53it/s]

Epoch 2277, Loss: 0.0141
Epoch 2278, Loss: 0.0141


Training:  91%|█████████ | 2279/2500 [06:54<00:40,  5.52it/s]

Epoch 2279, Loss: 0.0141
Epoch 2280, Loss: 0.0141


Training:  91%|█████████ | 2281/2500 [06:55<00:39,  5.54it/s]

Epoch 2281, Loss: 0.0141
Epoch 2282, Loss: 0.0141


Training:  91%|█████████▏| 2283/2500 [06:55<00:39,  5.55it/s]

Epoch 2283, Loss: 0.0141
Epoch 2284, Loss: 0.0141


Training:  91%|█████████▏| 2285/2500 [06:56<00:38,  5.55it/s]

Epoch 2285, Loss: 0.0141
Epoch 2286, Loss: 0.0141


Training:  91%|█████████▏| 2287/2500 [06:56<00:38,  5.56it/s]

Epoch 2287, Loss: 0.0141
Epoch 2288, Loss: 0.0141


Training:  92%|█████████▏| 2289/2500 [06:56<00:37,  5.57it/s]

Epoch 2289, Loss: 0.0141
Epoch 2290, Loss: 0.0141


Training:  92%|█████████▏| 2291/2500 [06:57<00:37,  5.55it/s]

Epoch 2291, Loss: 0.0141
Epoch 2292, Loss: 0.0141


Training:  92%|█████████▏| 2293/2500 [06:57<00:37,  5.55it/s]

Epoch 2293, Loss: 0.0141
Epoch 2294, Loss: 0.0141


Training:  92%|█████████▏| 2295/2500 [06:57<00:37,  5.54it/s]

Epoch 2295, Loss: 0.0141
Epoch 2296, Loss: 0.0141


Training:  92%|█████████▏| 2297/2500 [06:58<00:37,  5.46it/s]

Epoch 2297, Loss: 0.0141
Epoch 2298, Loss: 0.0141


Training:  92%|█████████▏| 2299/2500 [06:58<00:37,  5.42it/s]

Epoch 2299, Loss: 0.0141
Epoch 2300, Loss: 0.0141


Training:  92%|█████████▏| 2301/2500 [06:58<00:36,  5.38it/s]

Epoch 2301, Loss: 0.0141
Epoch 2302, Loss: 0.0141


Training:  92%|█████████▏| 2303/2500 [06:59<00:37,  5.31it/s]

Epoch 2303, Loss: 0.0141
Epoch 2304, Loss: 0.0141


Training:  92%|█████████▏| 2305/2500 [06:59<00:37,  5.23it/s]

Epoch 2305, Loss: 0.0141
Epoch 2306, Loss: 0.0141


Training:  92%|█████████▏| 2307/2500 [07:00<00:36,  5.23it/s]

Epoch 2307, Loss: 0.0141
Epoch 2308, Loss: 0.0141


Training:  92%|█████████▏| 2309/2500 [07:00<00:35,  5.38it/s]

Epoch 2309, Loss: 0.0141
Epoch 2310, Loss: 0.0141


Training:  92%|█████████▏| 2311/2500 [07:00<00:34,  5.47it/s]

Epoch 2311, Loss: 0.0141
Epoch 2312, Loss: 0.0141


Training:  93%|█████████▎| 2313/2500 [07:01<00:34,  5.48it/s]

Epoch 2313, Loss: 0.0141
Epoch 2314, Loss: 0.0141


Training:  93%|█████████▎| 2315/2500 [07:01<00:33,  5.50it/s]

Epoch 2315, Loss: 0.0141
Epoch 2316, Loss: 0.0141


Training:  93%|█████████▎| 2317/2500 [07:01<00:33,  5.52it/s]

Epoch 2317, Loss: 0.0141
Epoch 2318, Loss: 0.0141


Training:  93%|█████████▎| 2319/2500 [07:02<00:32,  5.53it/s]

Epoch 2319, Loss: 0.0141
Epoch 2320, Loss: 0.0141


Training:  93%|█████████▎| 2321/2500 [07:02<00:32,  5.53it/s]

Epoch 2321, Loss: 0.0141
Epoch 2322, Loss: 0.0141


Training:  93%|█████████▎| 2323/2500 [07:02<00:31,  5.53it/s]

Epoch 2323, Loss: 0.0141
Epoch 2324, Loss: 0.0141


Training:  93%|█████████▎| 2325/2500 [07:03<00:31,  5.53it/s]

Epoch 2325, Loss: 0.0141
Epoch 2326, Loss: 0.0141


Training:  93%|█████████▎| 2327/2500 [07:03<00:31,  5.53it/s]

Epoch 2327, Loss: 0.0141
Epoch 2328, Loss: 0.0141


Training:  93%|█████████▎| 2329/2500 [07:04<00:30,  5.55it/s]

Epoch 2329, Loss: 0.0141
Epoch 2330, Loss: 0.0141


Training:  93%|█████████▎| 2331/2500 [07:04<00:30,  5.51it/s]

Epoch 2331, Loss: 0.0141
Epoch 2332, Loss: 0.0141


Training:  93%|█████████▎| 2333/2500 [07:04<00:30,  5.52it/s]

Epoch 2333, Loss: 0.0141
Epoch 2334, Loss: 0.0141


Training:  93%|█████████▎| 2335/2500 [07:05<00:29,  5.54it/s]

Epoch 2335, Loss: 0.0141
Epoch 2336, Loss: 0.0141


Training:  93%|█████████▎| 2337/2500 [07:05<00:29,  5.53it/s]

Epoch 2337, Loss: 0.0141
Epoch 2338, Loss: 0.0141


Training:  94%|█████████▎| 2339/2500 [07:05<00:29,  5.55it/s]

Epoch 2339, Loss: 0.0141
Epoch 2340, Loss: 0.0141


Training:  94%|█████████▎| 2341/2500 [07:06<00:28,  5.55it/s]

Epoch 2341, Loss: 0.0141
Epoch 2342, Loss: 0.0141


Training:  94%|█████████▎| 2343/2500 [07:06<00:28,  5.55it/s]

Epoch 2343, Loss: 0.0141
Epoch 2344, Loss: 0.0141


Training:  94%|█████████▍| 2345/2500 [07:06<00:27,  5.55it/s]

Epoch 2345, Loss: 0.0141
Epoch 2346, Loss: 0.0141


Training:  94%|█████████▍| 2347/2500 [07:07<00:27,  5.55it/s]

Epoch 2347, Loss: 0.0141
Epoch 2348, Loss: 0.0141


Training:  94%|█████████▍| 2349/2500 [07:07<00:27,  5.54it/s]

Epoch 2349, Loss: 0.0141
Epoch 2350, Loss: 0.0141


Training:  94%|█████████▍| 2351/2500 [07:08<00:26,  5.55it/s]

Epoch 2351, Loss: 0.0141
Epoch 2352, Loss: 0.0141


Training:  94%|█████████▍| 2353/2500 [07:08<00:26,  5.53it/s]

Epoch 2353, Loss: 0.0141
Epoch 2354, Loss: 0.0141


Training:  94%|█████████▍| 2355/2500 [07:08<00:26,  5.54it/s]

Epoch 2355, Loss: 0.0141
Epoch 2356, Loss: 0.0141


Training:  94%|█████████▍| 2357/2500 [07:09<00:25,  5.55it/s]

Epoch 2357, Loss: 0.0141
Epoch 2358, Loss: 0.0141


Training:  94%|█████████▍| 2359/2500 [07:09<00:25,  5.55it/s]

Epoch 2359, Loss: 0.0141
Epoch 2360, Loss: 0.0141


Training:  94%|█████████▍| 2361/2500 [07:09<00:25,  5.53it/s]

Epoch 2361, Loss: 0.0141
Epoch 2362, Loss: 0.0141


Training:  95%|█████████▍| 2363/2500 [07:10<00:24,  5.50it/s]

Epoch 2363, Loss: 0.0141
Epoch 2364, Loss: 0.0141


Training:  95%|█████████▍| 2365/2500 [07:10<00:24,  5.44it/s]

Epoch 2365, Loss: 0.0141
Epoch 2366, Loss: 0.0141


Training:  95%|█████████▍| 2367/2500 [07:10<00:24,  5.41it/s]

Epoch 2367, Loss: 0.0141
Epoch 2368, Loss: 0.0141


Training:  95%|█████████▍| 2369/2500 [07:11<00:24,  5.36it/s]

Epoch 2369, Loss: 0.0141
Epoch 2370, Loss: 0.0141


Training:  95%|█████████▍| 2371/2500 [07:11<00:24,  5.18it/s]

Epoch 2371, Loss: 0.0141
Epoch 2372, Loss: 0.0141


Training:  95%|█████████▍| 2373/2500 [07:12<00:24,  5.20it/s]

Epoch 2373, Loss: 0.0141
Epoch 2374, Loss: 0.0141


Training:  95%|█████████▌| 2375/2500 [07:12<00:23,  5.34it/s]

Epoch 2375, Loss: 0.0141
Epoch 2376, Loss: 0.0141


Training:  95%|█████████▌| 2377/2500 [07:12<00:22,  5.45it/s]

Epoch 2377, Loss: 0.0141
Epoch 2378, Loss: 0.0141


Training:  95%|█████████▌| 2379/2500 [07:13<00:21,  5.50it/s]

Epoch 2379, Loss: 0.0141
Epoch 2380, Loss: 0.0141


Training:  95%|█████████▌| 2381/2500 [07:13<00:21,  5.53it/s]

Epoch 2381, Loss: 0.0141
Epoch 2382, Loss: 0.0141


Training:  95%|█████████▌| 2383/2500 [07:13<00:21,  5.52it/s]

Epoch 2383, Loss: 0.0141
Epoch 2384, Loss: 0.0141


Training:  95%|█████████▌| 2385/2500 [07:14<00:20,  5.54it/s]

Epoch 2385, Loss: 0.0141
Epoch 2386, Loss: 0.0141


Training:  95%|█████████▌| 2387/2500 [07:14<00:20,  5.51it/s]

Epoch 2387, Loss: 0.0141
Epoch 2388, Loss: 0.0141


Training:  96%|█████████▌| 2389/2500 [07:15<00:20,  5.52it/s]

Epoch 2389, Loss: 0.0141
Epoch 2390, Loss: 0.0141


Training:  96%|█████████▌| 2391/2500 [07:15<00:19,  5.53it/s]

Epoch 2391, Loss: 0.0141
Epoch 2392, Loss: 0.0141


Training:  96%|█████████▌| 2393/2500 [07:15<00:19,  5.52it/s]

Epoch 2393, Loss: 0.0141
Epoch 2394, Loss: 0.0141


Training:  96%|█████████▌| 2395/2500 [07:16<00:18,  5.53it/s]

Epoch 2395, Loss: 0.0141
Epoch 2396, Loss: 0.0141


Training:  96%|█████████▌| 2397/2500 [07:16<00:18,  5.52it/s]

Epoch 2397, Loss: 0.0141
Epoch 2398, Loss: 0.0141


Training:  96%|█████████▌| 2399/2500 [07:16<00:18,  5.52it/s]

Epoch 2399, Loss: 0.0141
Epoch 2400, Loss: 0.0141


Training:  96%|█████████▌| 2401/2500 [07:17<00:17,  5.54it/s]

Epoch 2401, Loss: 0.0141
Epoch 2402, Loss: 0.0141


Training:  96%|█████████▌| 2403/2500 [07:17<00:17,  5.53it/s]

Epoch 2403, Loss: 0.0141
Epoch 2404, Loss: 0.0141


Training:  96%|█████████▌| 2405/2500 [07:17<00:17,  5.51it/s]

Epoch 2405, Loss: 0.0141
Epoch 2406, Loss: 0.0141


Training:  96%|█████████▋| 2407/2500 [07:18<00:16,  5.53it/s]

Epoch 2407, Loss: 0.0141
Epoch 2408, Loss: 0.0141


Training:  96%|█████████▋| 2409/2500 [07:18<00:16,  5.53it/s]

Epoch 2409, Loss: 0.0141
Epoch 2410, Loss: 0.0141


Training:  96%|█████████▋| 2411/2500 [07:18<00:16,  5.52it/s]

Epoch 2411, Loss: 0.0141
Epoch 2412, Loss: 0.0141


Training:  97%|█████████▋| 2413/2500 [07:19<00:15,  5.54it/s]

Epoch 2413, Loss: 0.0141
Epoch 2414, Loss: 0.0141


Training:  97%|█████████▋| 2415/2500 [07:19<00:15,  5.56it/s]

Epoch 2415, Loss: 0.0141
Epoch 2416, Loss: 0.0141


Training:  97%|█████████▋| 2417/2500 [07:20<00:14,  5.56it/s]

Epoch 2417, Loss: 0.0141
Epoch 2418, Loss: 0.0141


Training:  97%|█████████▋| 2419/2500 [07:20<00:14,  5.50it/s]

Epoch 2419, Loss: 0.0141
Epoch 2420, Loss: 0.0141


Training:  97%|█████████▋| 2421/2500 [07:20<00:14,  5.52it/s]

Epoch 2421, Loss: 0.0141
Epoch 2422, Loss: 0.0141


Training:  97%|█████████▋| 2423/2500 [07:21<00:13,  5.54it/s]

Epoch 2423, Loss: 0.0141
Epoch 2424, Loss: 0.0141


Training:  97%|█████████▋| 2425/2500 [07:21<00:13,  5.54it/s]

Epoch 2425, Loss: 0.0141
Epoch 2426, Loss: 0.0141


Training:  97%|█████████▋| 2427/2500 [07:21<00:13,  5.54it/s]

Epoch 2427, Loss: 0.0141
Epoch 2428, Loss: 0.0141


Training:  97%|█████████▋| 2429/2500 [07:22<00:12,  5.53it/s]

Epoch 2429, Loss: 0.0141
Epoch 2430, Loss: 0.0141


Training:  97%|█████████▋| 2431/2500 [07:22<00:12,  5.46it/s]

Epoch 2431, Loss: 0.0141
Epoch 2432, Loss: 0.0141


Training:  97%|█████████▋| 2433/2500 [07:22<00:12,  5.44it/s]

Epoch 2433, Loss: 0.0141
Epoch 2434, Loss: 0.0141


Training:  97%|█████████▋| 2435/2500 [07:23<00:12,  5.37it/s]

Epoch 2435, Loss: 0.0141
Epoch 2436, Loss: 0.0141


Training:  97%|█████████▋| 2437/2500 [07:23<00:11,  5.43it/s]

Epoch 2437, Loss: 0.0141
Epoch 2438, Loss: 0.0141


Training:  98%|█████████▊| 2439/2500 [07:24<00:11,  5.41it/s]

Epoch 2439, Loss: 0.0141
Epoch 2440, Loss: 0.0141


Training:  98%|█████████▊| 2441/2500 [07:24<00:10,  5.37it/s]

Epoch 2441, Loss: 0.0141
Epoch 2442, Loss: 0.0141


Training:  98%|█████████▊| 2443/2500 [07:24<00:10,  5.44it/s]

Epoch 2443, Loss: 0.0141
Epoch 2444, Loss: 0.0141


Training:  98%|█████████▊| 2445/2500 [07:25<00:09,  5.50it/s]

Epoch 2445, Loss: 0.0141
Epoch 2446, Loss: 0.0141


Training:  98%|█████████▊| 2447/2500 [07:25<00:09,  5.52it/s]

Epoch 2447, Loss: 0.0141
Epoch 2448, Loss: 0.0141


Training:  98%|█████████▊| 2449/2500 [07:25<00:09,  5.55it/s]

Epoch 2449, Loss: 0.0141
Epoch 2450, Loss: 0.0141


Training:  98%|█████████▊| 2451/2500 [07:26<00:08,  5.54it/s]

Epoch 2451, Loss: 0.0141
Epoch 2452, Loss: 0.0141


Training:  98%|█████████▊| 2453/2500 [07:26<00:08,  5.54it/s]

Epoch 2453, Loss: 0.0141
Epoch 2454, Loss: 0.0141


Training:  98%|█████████▊| 2455/2500 [07:26<00:08,  5.53it/s]

Epoch 2455, Loss: 0.0141
Epoch 2456, Loss: 0.0141


Training:  98%|█████████▊| 2457/2500 [07:27<00:07,  5.51it/s]

Epoch 2457, Loss: 0.0141
Epoch 2458, Loss: 0.0141


Training:  98%|█████████▊| 2459/2500 [07:27<00:07,  5.52it/s]

Epoch 2459, Loss: 0.0141
Epoch 2460, Loss: 0.0141


Training:  98%|█████████▊| 2461/2500 [07:28<00:07,  5.54it/s]

Epoch 2461, Loss: 0.0141
Epoch 2462, Loss: 0.0141


Training:  99%|█████████▊| 2463/2500 [07:28<00:06,  5.53it/s]

Epoch 2463, Loss: 0.0141
Epoch 2464, Loss: 0.0141


Training:  99%|█████████▊| 2465/2500 [07:28<00:06,  5.54it/s]

Epoch 2465, Loss: 0.0141
Epoch 2466, Loss: 0.0141


Training:  99%|█████████▊| 2467/2500 [07:29<00:05,  5.55it/s]

Epoch 2467, Loss: 0.0141
Epoch 2468, Loss: 0.0141


Training:  99%|█████████▉| 2469/2500 [07:29<00:05,  5.52it/s]

Epoch 2469, Loss: 0.0141
Epoch 2470, Loss: 0.0141


Training:  99%|█████████▉| 2471/2500 [07:29<00:05,  5.54it/s]

Epoch 2471, Loss: 0.0141
Epoch 2472, Loss: 0.0141


Training:  99%|█████████▉| 2473/2500 [07:30<00:04,  5.53it/s]

Epoch 2473, Loss: 0.0141
Epoch 2474, Loss: 0.0141


Training:  99%|█████████▉| 2475/2500 [07:30<00:04,  5.53it/s]

Epoch 2475, Loss: 0.0141
Epoch 2476, Loss: 0.0141


Training:  99%|█████████▉| 2477/2500 [07:30<00:04,  5.52it/s]

Epoch 2477, Loss: 0.0141
Epoch 2478, Loss: 0.0141


Training:  99%|█████████▉| 2479/2500 [07:31<00:03,  5.54it/s]

Epoch 2479, Loss: 0.0141
Epoch 2480, Loss: 0.0141


Training:  99%|█████████▉| 2481/2500 [07:31<00:03,  5.55it/s]

Epoch 2481, Loss: 0.0141
Epoch 2482, Loss: 0.0141


Training:  99%|█████████▉| 2483/2500 [07:32<00:03,  5.55it/s]

Epoch 2483, Loss: 0.0141
Epoch 2484, Loss: 0.0141


Training:  99%|█████████▉| 2485/2500 [07:32<00:02,  5.54it/s]

Epoch 2485, Loss: 0.0141
Epoch 2486, Loss: 0.0141


Training:  99%|█████████▉| 2487/2500 [07:32<00:02,  5.53it/s]

Epoch 2487, Loss: 0.0141
Epoch 2488, Loss: 0.0141


Training: 100%|█████████▉| 2489/2500 [07:33<00:01,  5.55it/s]

Epoch 2489, Loss: 0.0141
Epoch 2490, Loss: 0.0141


Training: 100%|█████████▉| 2491/2500 [07:33<00:01,  5.50it/s]

Epoch 2491, Loss: 0.0141
Epoch 2492, Loss: 0.0141


Training: 100%|█████████▉| 2493/2500 [07:33<00:01,  5.52it/s]

Epoch 2493, Loss: 0.0141
Epoch 2494, Loss: 0.0141


Training: 100%|█████████▉| 2495/2500 [07:34<00:00,  5.54it/s]

Epoch 2495, Loss: 0.0141
Epoch 2496, Loss: 0.0141


Training: 100%|█████████▉| 2497/2500 [07:34<00:00,  5.50it/s]

Epoch 2497, Loss: 0.0141
Epoch 2498, Loss: 0.0141


Training: 100%|█████████▉| 2499/2500 [07:34<00:00,  5.35it/s]

Epoch 2499, Loss: 0.0141
Epoch 2500, Loss: 0.0141


Training: 100%|██████████| 2500/2500 [07:35<00:00,  5.49it/s]
Refinement:   0%|          | 2/500 [00:00<00:29, 17.11it/s]

Epoch 1, Loss: 0.0112
Epoch 2, Loss: 0.0110
Epoch 3, Loss: 0.0108
Epoch 4, Loss: 0.0107
Epoch 5, Loss: 0.0106


Refinement:   2%|▏         | 8/500 [00:00<00:25, 19.60it/s]

Epoch 6, Loss: 0.0105
Epoch 7, Loss: 0.0104
Epoch 8, Loss: 0.0104
Epoch 9, Loss: 0.0104
Epoch 10, Loss: 0.0104


Refinement:   3%|▎         | 14/500 [00:00<00:24, 19.53it/s]

Epoch 11, Loss: 0.0104
Epoch 12, Loss: 0.0104
Epoch 13, Loss: 0.0104
Epoch 14, Loss: 0.0104
Epoch 15, Loss: 0.0104


Refinement:   3%|▎         | 17/500 [00:00<00:24, 19.90it/s]

Epoch 16, Loss: 0.0104
Epoch 17, Loss: 0.0104
Epoch 18, Loss: 0.0104
Epoch 19, Loss: 0.0104
Epoch 20, Loss: 0.0103


Refinement:   5%|▍         | 24/500 [00:01<00:24, 19.56it/s]

Epoch 21, Loss: 0.0103
Epoch 22, Loss: 0.0103
Epoch 23, Loss: 0.0103
Epoch 24, Loss: 0.0103


Refinement:   6%|▌         | 28/500 [00:01<00:25, 18.64it/s]

Epoch 25, Loss: 0.0103
Epoch 26, Loss: 0.0103
Epoch 27, Loss: 0.0103
Epoch 28, Loss: 0.0103


Refinement:   6%|▋         | 32/500 [00:01<00:25, 18.65it/s]

Epoch 29, Loss: 0.0103
Epoch 30, Loss: 0.0103
Epoch 31, Loss: 0.0103
Epoch 32, Loss: 0.0103


Refinement:   7%|▋         | 34/500 [00:01<00:25, 18.59it/s]

Epoch 33, Loss: 0.0103
Epoch 34, Loss: 0.0103
Epoch 35, Loss: 0.0103
Epoch 36, Loss: 0.0103
Epoch 37, Loss: 0.0103


Refinement:   8%|▊         | 40/500 [00:02<00:23, 19.97it/s]

Epoch 38, Loss: 0.0103
Epoch 39, Loss: 0.0103
Epoch 40, Loss: 0.0103
Epoch 41, Loss: 0.0103
Epoch 42, Loss: 0.0103


Refinement:   9%|▉         | 46/500 [00:02<00:21, 20.64it/s]

Epoch 43, Loss: 0.0103
Epoch 44, Loss: 0.0103
Epoch 45, Loss: 0.0103
Epoch 46, Loss: 0.0103
Epoch 47, Loss: 0.0103


Refinement:  10%|▉         | 49/500 [00:02<00:21, 20.66it/s]

Epoch 48, Loss: 0.0103
Epoch 49, Loss: 0.0103
Epoch 50, Loss: 0.0103
Epoch 51, Loss: 0.0103
Epoch 52, Loss: 0.0103


Refinement:  11%|█         | 55/500 [00:02<00:21, 20.96it/s]

Epoch 53, Loss: 0.0103
Epoch 54, Loss: 0.0103
Epoch 55, Loss: 0.0103
Epoch 56, Loss: 0.0103
Epoch 57, Loss: 0.0103


Refinement:  12%|█▏        | 61/500 [00:03<00:20, 21.02it/s]

Epoch 58, Loss: 0.0103
Epoch 59, Loss: 0.0103
Epoch 60, Loss: 0.0103
Epoch 61, Loss: 0.0103
Epoch 62, Loss: 0.0103


Refinement:  13%|█▎        | 64/500 [00:03<00:20, 20.97it/s]

Epoch 63, Loss: 0.0103
Epoch 64, Loss: 0.0103
Epoch 65, Loss: 0.0103
Epoch 66, Loss: 0.0103
Epoch 67, Loss: 0.0103


Refinement:  14%|█▍        | 70/500 [00:03<00:20, 20.77it/s]

Epoch 68, Loss: 0.0103
Epoch 69, Loss: 0.0103
Epoch 70, Loss: 0.0103
Epoch 71, Loss: 0.0103
Epoch 72, Loss: 0.0103


Refinement:  15%|█▌        | 76/500 [00:03<00:20, 21.01it/s]

Epoch 73, Loss: 0.0103
Epoch 74, Loss: 0.0103
Epoch 75, Loss: 0.0103
Epoch 76, Loss: 0.0103
Epoch 77, Loss: 0.0103


Refinement:  16%|█▌        | 79/500 [00:03<00:20, 20.69it/s]

Epoch 78, Loss: 0.0103
Epoch 79, Loss: 0.0103
Epoch 80, Loss: 0.0103
Epoch 81, Loss: 0.0103
Epoch 82, Loss: 0.0103


Refinement:  17%|█▋        | 85/500 [00:04<00:19, 20.93it/s]

Epoch 83, Loss: 0.0103
Epoch 84, Loss: 0.0103
Epoch 85, Loss: 0.0103
Epoch 86, Loss: 0.0103
Epoch 87, Loss: 0.0103


Refinement:  18%|█▊        | 91/500 [00:04<00:19, 20.77it/s]

Epoch 88, Loss: 0.0103
Epoch 89, Loss: 0.0103
Epoch 90, Loss: 0.0103
Epoch 91, Loss: 0.0103
Epoch 92, Loss: 0.0103


Refinement:  19%|█▉        | 94/500 [00:04<00:19, 20.76it/s]

Epoch 93, Loss: 0.0103
Epoch 94, Loss: 0.0103
Epoch 95, Loss: 0.0103
Epoch 96, Loss: 0.0103
Epoch 97, Loss: 0.0103


Refinement:  20%|██        | 100/500 [00:04<00:19, 21.01it/s]

Epoch 98, Loss: 0.0103
Epoch 99, Loss: 0.0103
Epoch 100, Loss: 0.0103
Epoch 101, Loss: 0.0103
Epoch 102, Loss: 0.0103


Refinement:  21%|██        | 106/500 [00:05<00:18, 21.14it/s]

Epoch 103, Loss: 0.0103
Epoch 104, Loss: 0.0103
Epoch 105, Loss: 0.0103
Epoch 106, Loss: 0.0103
Epoch 107, Loss: 0.0103


Refinement:  22%|██▏       | 109/500 [00:05<00:18, 21.01it/s]

Epoch 108, Loss: 0.0103
Epoch 109, Loss: 0.0103
Epoch 110, Loss: 0.0103
Epoch 111, Loss: 0.0103
Epoch 112, Loss: 0.0103


Refinement:  23%|██▎       | 115/500 [00:05<00:18, 20.53it/s]

Epoch 113, Loss: 0.0103
Epoch 114, Loss: 0.0103
Epoch 115, Loss: 0.0103
Epoch 116, Loss: 0.0103
Epoch 117, Loss: 0.0103


Refinement:  24%|██▍       | 121/500 [00:05<00:18, 21.00it/s]

Epoch 118, Loss: 0.0103
Epoch 119, Loss: 0.0103
Epoch 120, Loss: 0.0103
Epoch 121, Loss: 0.0103
Epoch 122, Loss: 0.0103


Refinement:  25%|██▍       | 124/500 [00:06<00:17, 21.02it/s]

Epoch 123, Loss: 0.0103
Epoch 124, Loss: 0.0103
Epoch 125, Loss: 0.0103
Epoch 126, Loss: 0.0103
Epoch 127, Loss: 0.0103


Refinement:  26%|██▌       | 130/500 [00:06<00:17, 21.16it/s]

Epoch 128, Loss: 0.0103
Epoch 129, Loss: 0.0103
Epoch 130, Loss: 0.0103
Epoch 131, Loss: 0.0103
Epoch 132, Loss: 0.0103


Refinement:  27%|██▋       | 136/500 [00:06<00:17, 20.95it/s]

Epoch 133, Loss: 0.0103
Epoch 134, Loss: 0.0103
Epoch 135, Loss: 0.0103
Epoch 136, Loss: 0.0103
Epoch 137, Loss: 0.0103


Refinement:  28%|██▊       | 139/500 [00:06<00:17, 20.94it/s]

Epoch 138, Loss: 0.0103
Epoch 139, Loss: 0.0103
Epoch 140, Loss: 0.0103
Epoch 141, Loss: 0.0103
Epoch 142, Loss: 0.0103


Refinement:  29%|██▉       | 145/500 [00:07<00:16, 21.06it/s]

Epoch 143, Loss: 0.0103
Epoch 144, Loss: 0.0103
Epoch 145, Loss: 0.0103
Epoch 146, Loss: 0.0103
Epoch 147, Loss: 0.0103


Refinement:  30%|███       | 151/500 [00:07<00:16, 20.76it/s]

Epoch 148, Loss: 0.0103
Epoch 149, Loss: 0.0103
Epoch 150, Loss: 0.0103
Epoch 151, Loss: 0.0103
Epoch 152, Loss: 0.0103


Refinement:  31%|███       | 154/500 [00:07<00:16, 20.37it/s]

Epoch 153, Loss: 0.0103
Epoch 154, Loss: 0.0103
Epoch 155, Loss: 0.0103
Epoch 156, Loss: 0.0103


Refinement:  32%|███▏      | 160/500 [00:07<00:17, 19.97it/s]

Epoch 157, Loss: 0.0103
Epoch 158, Loss: 0.0103
Epoch 159, Loss: 0.0103
Epoch 160, Loss: 0.0103


Refinement:  33%|███▎      | 164/500 [00:08<00:17, 19.52it/s]

Epoch 161, Loss: 0.0103
Epoch 162, Loss: 0.0103
Epoch 163, Loss: 0.0103
Epoch 164, Loss: 0.0103


Refinement:  34%|███▎      | 168/500 [00:08<00:17, 19.23it/s]

Epoch 165, Loss: 0.0103
Epoch 166, Loss: 0.0103
Epoch 167, Loss: 0.0103
Epoch 168, Loss: 0.0103


Refinement:  34%|███▍      | 172/500 [00:08<00:17, 19.00it/s]

Epoch 169, Loss: 0.0103
Epoch 170, Loss: 0.0103
Epoch 171, Loss: 0.0103
Epoch 172, Loss: 0.0103


Refinement:  35%|███▍      | 174/500 [00:08<00:16, 19.23it/s]

Epoch 173, Loss: 0.0103
Epoch 174, Loss: 0.0103
Epoch 175, Loss: 0.0103
Epoch 176, Loss: 0.0103
Epoch 177, Loss: 0.0103


Refinement:  36%|███▌      | 180/500 [00:08<00:16, 19.97it/s]

Epoch 178, Loss: 0.0103
Epoch 179, Loss: 0.0103
Epoch 180, Loss: 0.0103
Epoch 181, Loss: 0.0103
Epoch 182, Loss: 0.0103


Refinement:  37%|███▋      | 184/500 [00:09<00:16, 19.26it/s]

Epoch 183, Loss: 0.0103
Epoch 184, Loss: 0.0103
Epoch 185, Loss: 0.0103
Epoch 186, Loss: 0.0103


Refinement:  38%|███▊      | 189/500 [00:09<00:16, 19.28it/s]

Epoch 187, Loss: 0.0103
Epoch 188, Loss: 0.0103
Epoch 189, Loss: 0.0103
Epoch 190, Loss: 0.0103


Refinement:  39%|███▊      | 193/500 [00:09<00:16, 18.93it/s]

Epoch 191, Loss: 0.0103
Epoch 192, Loss: 0.0103
Epoch 193, Loss: 0.0103
Epoch 194, Loss: 0.0103


Refinement:  39%|███▉      | 197/500 [00:09<00:16, 18.92it/s]

Epoch 195, Loss: 0.0103
Epoch 196, Loss: 0.0103
Epoch 197, Loss: 0.0103
Epoch 198, Loss: 0.0103


Refinement:  40%|████      | 200/500 [00:09<00:15, 19.78it/s]

Epoch 199, Loss: 0.0103
Epoch 200, Loss: 0.0103
Epoch 201, Loss: 0.0103
Epoch 202, Loss: 0.0103
Epoch 203, Loss: 0.0103


Refinement:  41%|████      | 206/500 [00:10<00:14, 20.62it/s]

Epoch 204, Loss: 0.0103
Epoch 205, Loss: 0.0103
Epoch 206, Loss: 0.0103
Epoch 207, Loss: 0.0103
Epoch 208, Loss: 0.0103


Refinement:  42%|████▏     | 212/500 [00:10<00:13, 21.04it/s]

Epoch 209, Loss: 0.0103
Epoch 210, Loss: 0.0103
Epoch 211, Loss: 0.0103
Epoch 212, Loss: 0.0103
Epoch 213, Loss: 0.0103


Refinement:  43%|████▎     | 215/500 [00:10<00:13, 20.89it/s]

Epoch 214, Loss: 0.0103
Epoch 215, Loss: 0.0103
Epoch 216, Loss: 0.0103
Epoch 217, Loss: 0.0103
Epoch 218, Loss: 0.0103


Refinement:  44%|████▍     | 221/500 [00:10<00:13, 20.77it/s]

Epoch 219, Loss: 0.0103
Epoch 220, Loss: 0.0103
Epoch 221, Loss: 0.0103
Epoch 222, Loss: 0.0103
Epoch 223, Loss: 0.0103


Refinement:  45%|████▌     | 227/500 [00:11<00:13, 20.98it/s]

Epoch 224, Loss: 0.0103
Epoch 225, Loss: 0.0103
Epoch 226, Loss: 0.0103
Epoch 227, Loss: 0.0103
Epoch 228, Loss: 0.0103


Refinement:  46%|████▌     | 230/500 [00:11<00:12, 21.00it/s]

Epoch 229, Loss: 0.0103
Epoch 230, Loss: 0.0103
Epoch 231, Loss: 0.0103
Epoch 232, Loss: 0.0103
Epoch 233, Loss: 0.0103


Refinement:  47%|████▋     | 236/500 [00:11<00:12, 21.02it/s]

Epoch 234, Loss: 0.0103
Epoch 235, Loss: 0.0103
Epoch 236, Loss: 0.0103
Epoch 237, Loss: 0.0103
Epoch 238, Loss: 0.0103


Refinement:  48%|████▊     | 242/500 [00:11<00:12, 20.56it/s]

Epoch 239, Loss: 0.0103
Epoch 240, Loss: 0.0103
Epoch 241, Loss: 0.0103
Epoch 242, Loss: 0.0103


Refinement:  49%|████▉     | 245/500 [00:12<00:12, 20.33it/s]

Epoch 243, Loss: 0.0103
Epoch 244, Loss: 0.0103
Epoch 245, Loss: 0.0103
Epoch 246, Loss: 0.0103


Refinement:  50%|█████     | 250/500 [00:12<00:12, 19.75it/s]

Epoch 247, Loss: 0.0103
Epoch 248, Loss: 0.0103
Epoch 249, Loss: 0.0103
Epoch 250, Loss: 0.0103


Refinement:  51%|█████     | 254/500 [00:12<00:12, 19.12it/s]

Epoch 251, Loss: 0.0103
Epoch 252, Loss: 0.0103
Epoch 253, Loss: 0.0103
Epoch 254, Loss: 0.0103


Refinement:  51%|█████     | 256/500 [00:12<00:12, 18.98it/s]

Epoch 255, Loss: 0.0103
Epoch 256, Loss: 0.0103
Epoch 257, Loss: 0.0103
Epoch 258, Loss: 0.0103
Epoch 259, Loss: 0.0103


Refinement:  52%|█████▏    | 261/500 [00:12<00:12, 19.19it/s]

Epoch 260, Loss: 0.0103
Epoch 261, Loss: 0.0103
Epoch 262, Loss: 0.0103
Epoch 263, Loss: 0.0103


Refinement:  53%|█████▎    | 266/500 [00:13<00:12, 19.42it/s]

Epoch 264, Loss: 0.0103
Epoch 265, Loss: 0.0103
Epoch 266, Loss: 0.0103
Epoch 267, Loss: 0.0103


Refinement:  54%|█████▍    | 270/500 [00:13<00:12, 19.05it/s]

Epoch 268, Loss: 0.0103
Epoch 269, Loss: 0.0103
Epoch 270, Loss: 0.0103
Epoch 271, Loss: 0.0103


Refinement:  55%|█████▍    | 274/500 [00:13<00:11, 18.93it/s]

Epoch 272, Loss: 0.0103
Epoch 273, Loss: 0.0103
Epoch 274, Loss: 0.0103
Epoch 275, Loss: 0.0103


Refinement:  56%|█████▌    | 278/500 [00:13<00:11, 18.67it/s]

Epoch 276, Loss: 0.0103
Epoch 277, Loss: 0.0103
Epoch 278, Loss: 0.0103
Epoch 279, Loss: 0.0103


Refinement:  56%|█████▋    | 282/500 [00:14<00:11, 18.37it/s]

Epoch 280, Loss: 0.0103
Epoch 281, Loss: 0.0103
Epoch 282, Loss: 0.0103
Epoch 283, Loss: 0.0103


Refinement:  57%|█████▋    | 286/500 [00:14<00:11, 18.46it/s]

Epoch 284, Loss: 0.0103
Epoch 285, Loss: 0.0103
Epoch 286, Loss: 0.0103
Epoch 287, Loss: 0.0103


Refinement:  58%|█████▊    | 289/500 [00:14<00:10, 19.40it/s]

Epoch 288, Loss: 0.0103
Epoch 289, Loss: 0.0103
Epoch 290, Loss: 0.0103
Epoch 291, Loss: 0.0103
Epoch 292, Loss: 0.0103


Refinement:  59%|█████▉    | 295/500 [00:14<00:10, 20.39it/s]

Epoch 293, Loss: 0.0103
Epoch 294, Loss: 0.0103
Epoch 295, Loss: 0.0103
Epoch 296, Loss: 0.0103
Epoch 297, Loss: 0.0103


Refinement:  60%|██████    | 301/500 [00:14<00:09, 20.85it/s]

Epoch 298, Loss: 0.0103
Epoch 299, Loss: 0.0103
Epoch 300, Loss: 0.0103
Epoch 301, Loss: 0.0103
Epoch 302, Loss: 0.0103


Refinement:  61%|██████    | 304/500 [00:15<00:09, 20.75it/s]

Epoch 303, Loss: 0.0103
Epoch 304, Loss: 0.0103
Epoch 305, Loss: 0.0103
Epoch 306, Loss: 0.0103
Epoch 307, Loss: 0.0103


Refinement:  62%|██████▏   | 310/500 [00:15<00:09, 20.74it/s]

Epoch 308, Loss: 0.0103
Epoch 309, Loss: 0.0103
Epoch 310, Loss: 0.0103
Epoch 311, Loss: 0.0103
Epoch 312, Loss: 0.0103


Refinement:  63%|██████▎   | 316/500 [00:15<00:08, 20.90it/s]

Epoch 313, Loss: 0.0103
Epoch 314, Loss: 0.0103
Epoch 315, Loss: 0.0103
Epoch 316, Loss: 0.0103
Epoch 317, Loss: 0.0103


Refinement:  64%|██████▍   | 319/500 [00:15<00:08, 20.96it/s]

Epoch 318, Loss: 0.0103
Epoch 319, Loss: 0.0103
Epoch 320, Loss: 0.0103
Epoch 321, Loss: 0.0103
Epoch 322, Loss: 0.0103


Refinement:  65%|██████▌   | 325/500 [00:16<00:08, 20.71it/s]

Epoch 323, Loss: 0.0103
Epoch 324, Loss: 0.0103
Epoch 325, Loss: 0.0103
Epoch 326, Loss: 0.0103
Epoch 327, Loss: 0.0103


Refinement:  66%|██████▌   | 331/500 [00:16<00:08, 20.99it/s]

Epoch 328, Loss: 0.0103
Epoch 329, Loss: 0.0103
Epoch 330, Loss: 0.0103
Epoch 331, Loss: 0.0103
Epoch 332, Loss: 0.0103


Refinement:  67%|██████▋   | 334/500 [00:16<00:07, 20.79it/s]

Epoch 333, Loss: 0.0103
Epoch 334, Loss: 0.0103
Epoch 335, Loss: 0.0103
Epoch 336, Loss: 0.0103
Epoch 337, Loss: 0.0103


Refinement:  68%|██████▊   | 340/500 [00:16<00:07, 20.95it/s]

Epoch 338, Loss: 0.0103
Epoch 339, Loss: 0.0103
Epoch 340, Loss: 0.0103
Epoch 341, Loss: 0.0103
Epoch 342, Loss: 0.0103


Refinement:  69%|██████▉   | 346/500 [00:17<00:07, 20.82it/s]

Epoch 343, Loss: 0.0103
Epoch 344, Loss: 0.0103
Epoch 345, Loss: 0.0103
Epoch 346, Loss: 0.0103
Epoch 347, Loss: 0.0103


Refinement:  70%|██████▉   | 349/500 [00:17<00:07, 20.90it/s]

Epoch 348, Loss: 0.0103
Epoch 349, Loss: 0.0103
Epoch 350, Loss: 0.0103
Epoch 351, Loss: 0.0103
Epoch 352, Loss: 0.0103


Refinement:  71%|███████   | 355/500 [00:17<00:06, 21.02it/s]

Epoch 353, Loss: 0.0103
Epoch 354, Loss: 0.0103
Epoch 355, Loss: 0.0103
Epoch 356, Loss: 0.0103
Epoch 357, Loss: 0.0103


Refinement:  72%|███████▏  | 361/500 [00:17<00:06, 21.11it/s]

Epoch 358, Loss: 0.0103
Epoch 359, Loss: 0.0103
Epoch 360, Loss: 0.0103
Epoch 361, Loss: 0.0103
Epoch 362, Loss: 0.0103


Refinement:  73%|███████▎  | 364/500 [00:17<00:06, 21.10it/s]

Epoch 363, Loss: 0.0103
Epoch 364, Loss: 0.0103
Epoch 365, Loss: 0.0103
Epoch 366, Loss: 0.0103
Epoch 367, Loss: 0.0103


Refinement:  74%|███████▍  | 370/500 [00:18<00:06, 21.09it/s]

Epoch 368, Loss: 0.0103
Epoch 369, Loss: 0.0103
Epoch 370, Loss: 0.0103
Epoch 371, Loss: 0.0103
Epoch 372, Loss: 0.0103


Refinement:  75%|███████▌  | 376/500 [00:18<00:05, 21.04it/s]

Epoch 373, Loss: 0.0103
Epoch 374, Loss: 0.0103
Epoch 375, Loss: 0.0103
Epoch 376, Loss: 0.0103
Epoch 377, Loss: 0.0103


Refinement:  76%|███████▌  | 379/500 [00:18<00:05, 21.08it/s]

Epoch 378, Loss: 0.0103
Epoch 379, Loss: 0.0103
Epoch 380, Loss: 0.0103
Epoch 381, Loss: 0.0103
Epoch 382, Loss: 0.0103


Refinement:  77%|███████▋  | 385/500 [00:18<00:05, 21.17it/s]

Epoch 383, Loss: 0.0103
Epoch 384, Loss: 0.0103
Epoch 385, Loss: 0.0103
Epoch 386, Loss: 0.0103
Epoch 387, Loss: 0.0103


Refinement:  78%|███████▊  | 391/500 [00:19<00:05, 20.94it/s]

Epoch 388, Loss: 0.0103
Epoch 389, Loss: 0.0103
Epoch 390, Loss: 0.0103
Epoch 391, Loss: 0.0103
Epoch 392, Loss: 0.0103


Refinement:  79%|███████▉  | 394/500 [00:19<00:05, 20.98it/s]

Epoch 393, Loss: 0.0103
Epoch 394, Loss: 0.0103
Epoch 395, Loss: 0.0103
Epoch 396, Loss: 0.0103
Epoch 397, Loss: 0.0103


Refinement:  80%|████████  | 400/500 [00:19<00:04, 21.10it/s]

Epoch 398, Loss: 0.0103
Epoch 399, Loss: 0.0103
Epoch 400, Loss: 0.0103
Epoch 401, Loss: 0.0103
Epoch 402, Loss: 0.0103


Refinement:  81%|████████  | 406/500 [00:19<00:04, 21.21it/s]

Epoch 403, Loss: 0.0103
Epoch 404, Loss: 0.0103
Epoch 405, Loss: 0.0103
Epoch 406, Loss: 0.0103
Epoch 407, Loss: 0.0103


Refinement:  82%|████████▏ | 409/500 [00:20<00:04, 21.10it/s]

Epoch 408, Loss: 0.0103
Epoch 409, Loss: 0.0103
Epoch 410, Loss: 0.0103
Epoch 411, Loss: 0.0103
Epoch 412, Loss: 0.0103


Refinement:  83%|████████▎ | 415/500 [00:20<00:04, 20.96it/s]

Epoch 413, Loss: 0.0103
Epoch 414, Loss: 0.0103
Epoch 415, Loss: 0.0103
Epoch 416, Loss: 0.0103
Epoch 417, Loss: 0.0103


Refinement:  84%|████████▍ | 421/500 [00:20<00:03, 21.08it/s]

Epoch 418, Loss: 0.0103
Epoch 419, Loss: 0.0103
Epoch 420, Loss: 0.0103
Epoch 421, Loss: 0.0103
Epoch 422, Loss: 0.0103


Refinement:  85%|████████▍ | 424/500 [00:20<00:03, 21.08it/s]

Epoch 423, Loss: 0.0103
Epoch 424, Loss: 0.0103
Epoch 425, Loss: 0.0103
Epoch 426, Loss: 0.0103
Epoch 427, Loss: 0.0103


Refinement:  86%|████████▌ | 430/500 [00:21<00:03, 21.28it/s]

Epoch 428, Loss: 0.0103
Epoch 429, Loss: 0.0103
Epoch 430, Loss: 0.0103
Epoch 431, Loss: 0.0103
Epoch 432, Loss: 0.0103


Refinement:  87%|████████▋ | 436/500 [00:21<00:03, 21.12it/s]

Epoch 433, Loss: 0.0103
Epoch 434, Loss: 0.0103
Epoch 435, Loss: 0.0103
Epoch 436, Loss: 0.0103
Epoch 437, Loss: 0.0103


Refinement:  88%|████████▊ | 439/500 [00:21<00:02, 21.07it/s]

Epoch 438, Loss: 0.0103
Epoch 439, Loss: 0.0103
Epoch 440, Loss: 0.0103
Epoch 441, Loss: 0.0103
Epoch 442, Loss: 0.0103


Refinement:  89%|████████▉ | 445/500 [00:21<00:02, 21.25it/s]

Epoch 443, Loss: 0.0103
Epoch 444, Loss: 0.0103
Epoch 445, Loss: 0.0103
Epoch 446, Loss: 0.0103
Epoch 447, Loss: 0.0103


Refinement:  90%|█████████ | 451/500 [00:22<00:02, 21.27it/s]

Epoch 448, Loss: 0.0103
Epoch 449, Loss: 0.0103
Epoch 450, Loss: 0.0103
Epoch 451, Loss: 0.0103
Epoch 452, Loss: 0.0103


Refinement:  91%|█████████ | 454/500 [00:22<00:02, 20.93it/s]

Epoch 453, Loss: 0.0103
Epoch 454, Loss: 0.0103
Epoch 455, Loss: 0.0103
Epoch 456, Loss: 0.0103
Epoch 457, Loss: 0.0103


Refinement:  92%|█████████▏| 460/500 [00:22<00:01, 21.18it/s]

Epoch 458, Loss: 0.0103
Epoch 459, Loss: 0.0103
Epoch 460, Loss: 0.0103
Epoch 461, Loss: 0.0103
Epoch 462, Loss: 0.0103


Refinement:  93%|█████████▎| 466/500 [00:22<00:01, 21.28it/s]

Epoch 463, Loss: 0.0103
Epoch 464, Loss: 0.0103
Epoch 465, Loss: 0.0103
Epoch 466, Loss: 0.0103
Epoch 467, Loss: 0.0103


Refinement:  94%|█████████▍| 469/500 [00:22<00:01, 21.31it/s]

Epoch 468, Loss: 0.0103
Epoch 469, Loss: 0.0103
Epoch 470, Loss: 0.0103
Epoch 471, Loss: 0.0103
Epoch 472, Loss: 0.0103


Refinement:  95%|█████████▌| 475/500 [00:23<00:01, 21.15it/s]

Epoch 473, Loss: 0.0103
Epoch 474, Loss: 0.0103
Epoch 475, Loss: 0.0103
Epoch 476, Loss: 0.0103
Epoch 477, Loss: 0.0103


Refinement:  96%|█████████▌| 481/500 [00:23<00:00, 21.03it/s]

Epoch 478, Loss: 0.0103
Epoch 479, Loss: 0.0103
Epoch 480, Loss: 0.0103
Epoch 481, Loss: 0.0103
Epoch 482, Loss: 0.0103


Refinement:  97%|█████████▋| 484/500 [00:23<00:00, 20.95it/s]

Epoch 483, Loss: 0.0103
Epoch 484, Loss: 0.0103
Epoch 485, Loss: 0.0103
Epoch 486, Loss: 0.0103
Epoch 487, Loss: 0.0103


Refinement:  98%|█████████▊| 490/500 [00:23<00:00, 21.08it/s]

Epoch 488, Loss: 0.0103
Epoch 489, Loss: 0.0103
Epoch 490, Loss: 0.0103
Epoch 491, Loss: 0.0103
Epoch 492, Loss: 0.0103


Refinement:  99%|█████████▉| 496/500 [00:24<00:00, 20.89it/s]

Epoch 493, Loss: 0.0103
Epoch 494, Loss: 0.0103
Epoch 495, Loss: 0.0103
Epoch 496, Loss: 0.0103
Epoch 497, Loss: 0.0103


Refinement: 100%|██████████| 500/500 [00:24<00:00, 20.44it/s]

Epoch 498, Loss: 0.0103
Epoch 499, Loss: 0.0103
Epoch 500, Loss: 0.0103





In [16]:
print(sindy.sindy_coefficients*sindy.coefficient_mask)

tensor([[-0.0000],
        [-0.0000],
        [-0.1998],
        [ 0.0000],
        [-0.2506],
        [-0.0000],
        [ 0.3379],
        [-0.1766],
        [-0.0000],
        [-0.3826],
        [-0.3841],
        [ 0.0000]], device='cuda:0', dtype=torch.float64,
       grad_fn=<MulBackward0>)
