In [None]:
from typing import Optional
import math
import torch.nn as nn


class Generator(nn.Module):
    def __init__(self, input_length: int):
        super(Generator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), int(input_length))
        self.activation = nn.Sigmoid()

    def forward(self, x):
        return self.activation(self.dense_layer(x))


class Discriminator(nn.Module):
    def __init__(self, input_length: int):
        super(Discriminator, self).__init__()
        self.dense = nn.Linear(int(input_length), 1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        return self.activation(self.dense(x))


class DCGenerator(nn.Module):
    def __init__(self, input_length: int, n_channels: int,  num_base_filters: Optional[int]):
        super(DCGenerator, self).__init__()

        # Calculates the total number of layers
        number_of_layers = int(math.log(self.img_cols, 2) - 3)

        if self.num_base_filters is None:
            num_base_filters = 32 * 2 ** number_of_layers

        # Create the list to hold all sequential layers
        self.layers_list = []

        # Add the initial layer
        self.layers_list.append(nn.Linear(input_length, num_base_filters * 8 * 8))
        self.layers_list.append(nn.ReLU())

        # Add a scaled number of layers
        self.layers_list.append(nn.BatchNorm2d(128))
        self.layers_list.append(nn.Upsample(scale_factor=2))
        self.layers_list.append(nn.Conv2d(128, 128, 3, stride=1, padding=1))
        self.layers_list.append(nn.BatchNorm2d(128, 0.8))
        self.layers_list.append(nn.LeakyReLU(0.2, inplace=True))
        self.layers_list.append(nn.Upsample(scale_factor=2))
        self.layers_list.append(nn.Conv2d(128, 64, 3, stride=1, padding=1))
        self.layers_list.append(nn.BatchNorm2d(64, 0.8))
        self.layers_list.append(nn.LeakyReLU(0.2, inplace=True))
        self.layers_list.append(nn.Conv2d(64, n_channels, 3, stride=1, padding=1))
        self.layers_list.append(nn.Tanh())

        self.layers = nn.ModuleList(self.layers_list)

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
        return x


class DCDiscriminator(nn.Module):
    def __init__(self, image_size: int, input_channels: int):
        super(DCDiscriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(input_channels, 32, 3, stride=2, padding=1),
            nn.ELU(),
            nn.Dropout2d(0.2),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ELU(),
            nn.BatchNorm2d(64, 0.8),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ELU(),
            nn.BatchNorm2d(128, 0.8),
            nn.Conv2d(128, 256, 3, stride=2, padding=1),
            nn.ELU(),
            nn.Linear(256 * 8 * 8, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
from typing import Tuple
import math

import torch
import torch.nn as nn

# from models import Discriminator, Generator
# from utils import generate_even_data, convert_float_matrix_to_int_list


def train(
    max_int: int = 128,
    batch_size: int = 16,
    training_steps: int = 500,
    learning_rate: float = 0.001,
    print_output_every_n_steps: int = 10,
) -> Tuple[nn.Module]:
    """Trains the even GAN
    Args:
        max_int: The maximum integer our dataset goes to.  It is used to set the size of the binary
            lists
        batch_size: The number of examples in a training batch
        training_steps: The number of steps to train on.
        learning_rate: The learning rate for the generator and discriminator
        print_output_every_n_steps: The number of training steps before we print generated output
    Returns:
        generator: The trained generator model
        discriminator: The trained discriminator model
    """
    input_length = int(math.log(max_int, 2))

    # Models
    generator = Generator(input_length)
    discriminator = Discriminator(input_length)

    # Optimizers
    generator_optimizer = torch.optim.Adam(generator.parameters(), lr=learning_rate)
    discriminator_optimizer = torch.optim.Adam(
        discriminator.parameters(), lr=learning_rate
    )

    # loss
    loss = nn.BCELoss()

    for i in range(training_steps):
        # zero the gradients on each iteration
        generator_optimizer.zero_grad()

        # Create noisy input for generator
        # Need float type instead of int
        noise = torch.randint(0, 2, size=(batch_size, input_length)).float()
        generated_data = generator(noise)

        # Generate examples of even real data
        true_labels, true_data = generate_even_data(max_int, batch_size=batch_size)
        print(true_labels)
        print("ana")
        print(true_data)
        print("bibi")
        print(X)
        print("cici")
        print(y)
        true_labels = torch.tensor(true_labels).float()
        true_data = torch.tensor(true_data).float()

        # Train the generator
        # We invert the labels here and don't train the discriminator because we want the generator
        # to make things the discriminator classifies as true.
        generator_discriminator_out = discriminator(generated_data)
        generator_loss = loss(generator_discriminator_out, true_labels)
        generator_loss.backward()
        generator_optimizer.step()

        # Train the discriminator on the true/generated data
        discriminator_optimizer.zero_grad()
        true_discriminator_out = discriminator(true_data)
        true_discriminator_loss = loss(true_discriminator_out, true_labels)

        # add .detach() here think about this
        generator_discriminator_out = discriminator(generated_data.detach())
        generator_discriminator_loss = loss(
            generator_discriminator_out, torch.zeros(batch_size)
        )
        discriminator_loss = (
            true_discriminator_loss + generator_discriminator_loss
        ) / 2
        discriminator_loss.backward()
        discriminator_optimizer.step()
        if i % print_output_every_n_steps == 0:
            print(convert_float_matrix_to_int_list(generated_data))

    return generator, discriminator


if __name__ == "__main__":
    train()

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
ana
[[0, 1, 1, 1, 1, 1, 0], [0, 0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0], [0, 1, 1, 0, 1, 1, 0], [1, 0, 1, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 0], [1, 0, 1, 1, 1, 1, 0], [0, 1, 0, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 1, 0], [1, 1, 1, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 1, 0]]
bibi
            Time         V1         V2        V3        V4        V5  \
0            0.0  -1.359807  -0.072781  2.536347  1.378155 -0.338321   
1            0.0   1.191857   0.266151  0.166480  0.448154  0.060018   
2            1.0  -1.358354  -1.340163  1.773209  0.379780 -0.503198   
3            1.0  -0.966272  -0.185226  1.792993 -0.863291 -0.010309   
4            2.0  -1.158233   0.877737  1.548718  0.403034 -0.407193   
...          ...        ...        ...       ...       ...       ...   
284802  172786.0 -11.881118  10.071785 -9.834783 -2.066656 -5.364473 

ValueError: ignored

In [None]:
import math
from typing import List
import unittest

from ddt import data, ddt, unpack
import torch

from utils import convert_float_matrix_to_int_list
from train import train


@ddt
class TrainTest(unittest.TestCase):
    @data(
        (128, 16, 500, 0.001, "Test reasonable parameters"),
        (256, 16, 500, 0.001, "Test reasonable parameters"),
    )
    @unpack
    def test_train(
        self,
        max_int: int,
        batch_size: int,
        training_steps: int,
        learning_rate: float,
        test_description: str,
    ):
        input_length = int(math.log(max_int, 2))
        generator, discriminator = train(
            max_int=max_int,
            batch_size=batch_size,
            training_steps=training_steps,
            learning_rate=learning_rate,
            print_output_every_n_steps=1000000,
        )
        noise = torch.randint(0, 2, size=(batch_size, input_length)).float()
        generated_data = generator(noise)
        for num in convert_float_matrix_to_int_list(generated_data):
            self.assertEqual(num % 2, 0, test_description)


if __name__ == "__main__":
    unittest.main()

ModuleNotFoundError: ignored

In [None]:
from typing import List, Tuple
import numpy as np
import math


def create_binary_list_from_int(number: int) -> List[int]:
    """Creates a list of the binary representation of a positive integer
    Args:
        number: An integer
    Returns:
        The binary representation of the provided positive integer number as a list.
    """
    if number < 0 or type(number) is not int:
        raise ValueError("Only Positive integers are allowed")

    return [int(x) for x in list(bin(number))[2:]]


def generate_even_data(
    max_int: int, batch_size: int = 16
) -> Tuple[List[int], List[List[int]]]:
    """An infinite data generator which yields
    Args:
        max_int: The maximum input integer value
        batch_size: The size of the training batch.
    Returns:
        A Tuple with the labels and the input data.
        labels:
        data:
    """

    # Get the number of binary places needed to represent the maximum number
    max_length = int(math.log(max_int, 2))

    # Sample batch_size number of integers in range 0-max_int
    sampled_integers = np.random.randint(0, int(max_int / 2), batch_size)

    # create a list of labels all ones because all numbers are even
    labels = [1] * batch_size

    # Generate a list of binary numbers for training.
    data = [create_binary_list_from_int(int(x * 2)) for x in sampled_integers]
    data = [([0] * (max_length - len(x))) + x for x in data]

    return labels, data


def convert_float_matrix_to_int_list(
    float_matrix: np.array, threshold: float = 0.5
) -> List[int]:
    """Converts generated output in binary list form to a list of integers
    Args:
        float_matrix: A matrix of values between 0 and 1 which we want to threshold and convert to
            integers
        threshold: The cutoff value for 0 and 1 thresholding.
    Returns:
        A list of integers.
    """
    return [
        int("".join([str(int(y)) for y in x]), 2) for x in float_matrix >= threshold
    ]

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
!unzip gdrive/My\ Drive/ieee-fraud-detection.zip
!unzip gdrive/My\ Drive/creditcard.zip

Mounted at /content/gdrive
Archive:  gdrive/My Drive/ieee-fraud-detection.zip
  inflating: sample_submission.csv   
  inflating: test_identity.csv       
  inflating: test_transaction.csv    
  inflating: train_identity.csv      
  inflating: train_transaction.csv   
Archive:  gdrive/My Drive/creditcard.zip
  inflating: creditcard.csv          


In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt 
import seaborn as sns
data = pd.read_csv("creditcard.csv")
X = data.drop(columns="Class")
y = data["Class"]

In [None]:
# import the required packages
import torch
import argparse
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import csv

In [None]:
batch_size = 16

train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='/creditcard.csv', download = True, transform=train_transform)
# datasets.ImageFolder(root='/creditcard.csv', transform=train_transform)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# custom weights initialization called on gen and disc model
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.normal_(m.weight, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        torch.nn.init.normal_(m.weight, 1.0, 0.02)
        torch.nn.init.zeros_(m.bias)   

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /creditcard.csv/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /creditcard.csv/MNIST/raw/train-images-idx3-ubyte.gz to /creditcard.csv/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /creditcard.csv/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /creditcard.csv/MNIST/raw/train-labels-idx1-ubyte.gz to /creditcard.csv/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /creditcard.csv/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /creditcard.csv/MNIST/raw/t10k-images-idx3-ubyte.gz to /creditcard.csv/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /creditcard.csv/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /creditcard.csv/MNIST/raw/t10k-labels-idx1-ubyte.gz to /creditcard.csv/MNIST/raw



In [None]:
# Generator Model Class Definition
image_shape = (1, 64, 64)
image_dim = int(np.prod(image_shape))
latent_dim = 100
device = 'cpu'
num_epochs = 10    
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # Block 1:input is Z, going into a convolution
            nn.ConvTranspose2d(latent_dim, 64 * 8, 1, 1, 0, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.ReLU(True),
            # Block 2: input is (64 * 8) x 4 x 4
            nn.ConvTranspose2d(64 * 8, 64 * 4, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.ReLU(True),
            # Block 3: input is (64 * 4) x 8 x 8
            nn.ConvTranspose2d(64 * 4, 64 * 2, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.ReLU(True),
            # Block 4: input is (64 * 2) x 16 x 16
            nn.ConvTranspose2d(64 * 2, 64, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            # Block 5: input is (64) x 32 x 32
            nn.ConvTranspose2d(64, 3, 1, 2, 1, bias=False),
            nn.Tanh()
            # Output: output is (3) x 64 x 64
        )

    def forward(self, input):
        output = self.main(input)
        return output    

In [None]:
# Discriminator Model Class Definition
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # Block 1: input is (3) x 64 x 64
            nn.Conv2d(1, 64, 1, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # Block 2: input is (64) x 32 x 32
            nn.Conv2d(64, 64 * 2, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # Block 3: input is (64*2) x 16 x 16
            nn.Conv2d(64 * 2, 64 * 4, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # Block 4: input is (64*4) x 8 x 8
            nn.Conv2d(64 * 4, 64 * 8, 1, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # Block 5: input is (64*8) x 4 x 4
            nn.Conv2d(64 * 8, 1, 1, 1, 0, bias=False),
            nn.Sigmoid(),
            nn.Flatten()
            # Output: 1
        )

    def forward(self, input):
        output = self.main(input)
        return output


In [None]:
generator = Generator().to(device)
generator.apply(weights_init)
discriminator = Discriminator().to(device)
discriminator.apply(weights_init)

adversarial_loss = nn.CrossEntropyLoss()

def generator_loss(fake_output, label):
    gen_loss = adversarial_loss(fake_output, label)
    #print(gen_loss)
    return gen_loss

def discriminator_loss(output, label):
    disc_loss = adversarial_loss(output, label)
    return disc_loss

learning_rate = 0.0002 
G_optimizer = optim.Adam(generator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
D_optimizer = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))



In [None]:
for epoch in range(1, num_epochs+1): 
    D_loss_list, G_loss_list = [], []
   
    for index, (real_images, _) in enumerate(train_loader):
      D_optimizer.zero_grad()
      real_images = real_images.to(device)
      
      real_target = Variable(torch.ones(real_images.size(0)).to(device)).long()
      fake_target = Variable(torch.zeros(real_images.size(0)).to(device)).long()

      output = discriminator(real_images)
      D_real_loss = discriminator_loss(output, real_target)
      D_real_loss.backward()
 
      noise_vector = torch.randn(real_images.size(0), 100, 1, 1, device=device)  
      noise_vector = noise_vector.to(device)
      generated_image = generator(noise_vector)
      output = discriminator(generated_image.detach())
      D_fake_loss = discriminator_loss(output,fake_target)
 
      # train with fake
      D_fake_loss.backward()
      
      D_total_loss = D_real_loss + D_fake_loss
      D_loss_list.append(D_total_loss)
      
      D_optimizer.step()


RuntimeError: ignored

In [None]:
# Train G on D's output
      G_optimizer.zero_grad()
      gen_output = discriminator(generated_image)
      G_loss = generator_loss(gen_output, real_target)
      G_loss_list.append(G_loss)

      G_loss.backward()
      G_optimizer.step()



In [None]:
import torch, torch.nn as nn, torch.nn.functional as F
from torch.autograd import Variable
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch.utils.data as data_utils

df = pd.read_csv('creditcard.csv')
df.head(1) # give us a sneek preview of the dataset xD

X = df.iloc[:, :-1].values # extracting features
y = df.iloc[:, -1].values # extracting labels

sc = StandardScaler()
X = sc.fit_transform(X)