# Imports

In [20]:
import numpy as np
import pandas as pd
import random as rand


from keras.utils import to_categorical
from keras.layers import Dense, Activation, Conv1D, Dropout
from keras.models import Sequential

# Data pre-processing

Due to the large size of dataset, many columns that are not needed and some rows containing null values have already been dropped

In [23]:
data = pd.read_csv("dataset.csv")

data = data.drop(['class', 'epoch_cal'], axis=1)
data = data.dropna()
data['neo'] = data['neo'].replace(('Y','N'), (1,0))
data['pha'] = data['pha'].replace(('Y','N'), (1,0))

data = data.astype('float32')
data /= 255

print(data.shape)

(131124, 16)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['neo'] = data['neo'].replace(('Y','N'), (1,0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pha'] = data['pha'].replace(('Y','N'), (1,0))


# Generator model
16 outputs as there are 16 columns in the dataset

In [24]:
def make_generator():
  model = Sequential()
  
  model.add(Dense(40, activation='relu', input_shape=INPUT_SHAPE))
  model.add(Dense(1, activation='sigmoid'))

  return model

In [25]:
def generate_fakes():
  neo = rand.choice([1,0]) 
  pha = rand.choice([1,0])
  H = rand.uniform(0,10000)
  diameter = rand.uniform(0,100000)
  albedo = rand.uniform(0,1)
  e = rand.uniform(0, 0.002)
  a = rand.uniform(0,1)
  q = rand.uniform(0,1)
  i = rand.uniform(0,1)
  om = rand.uniform(0,1)
  w = rand.uniform(0,2)
  ad = rand.uniform(0,0.05)
  n = rand.uniform(0,0.01)
  tp_cal = rand.uniform(0,100000)
  per = rand.uniform(0,10)
  moid = rand.uniform(0,0.1)

  X = np.array([[neo, pha, H, diameter, albedo, e, a, q, i, om, w, ad, n, tp_cal, per, moid]])
  return X

# Discriminator model
This is a binary CNN classifier, to determine whether the input is 'real' or not

In [26]:
def make_discriminator():
  model = Sequential()

  model.add(Conv1D(64, kernel_size=5, padding='same', activation='relu', input_shape=INPUT_SHAPE))

  model.add(Dense(1, activation='sigmoid'))

  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

Generate random, fake samples for training discriminator

# Putting the GAN together

In [27]:
def make_gan(generator, discriminator):
  discriminator.trainable = False
  model = Sequential()
  model.add(generator)
  model.add(discriminator)
  model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
  return model

Training parameters


In [33]:
INPUT_SHAPE = (16,1)
OPTIMIZER = 'adam'
LOSS = 'binary_crossentropy'
METRICS = 'accuracy'

BATCH_SIZE = 128
N_EPOCHS = 2048 # this is the most epochs that can be done with this dataset due to how much data it has
VERBOSE = 1
VALIDATION_SPLIT = 0.2

In [32]:
def training(generator, discriminator, gan, batch_size, n_epochs):
  half_batch = int(batch_size/2)

  print(data.shape)
  current_row = 0
  for i in range(n_epochs): #10000 epochs

    print("Epoch : " + str(i+1))

    x_real = data[current_row:half_batch + current_row]  # fix so its not 'head' and is seeing new data each time
    y_real = np.ones((half_batch, 16))

    current_row = current_row + half_batch
    print(current_row)

    x_fake = np.array([[]])

    for i in range(half_batch):
      current_fake = generate_fakes()
      x_fake = np.append(x_fake, current_fake)

    y_fake = np.zeros((half_batch, 16))
    x_fake = x_fake.reshape(half_batch,16)

    print(discriminator.train_on_batch(x_real, y_real))
    print(discriminator.train_on_batch(x_fake, y_fake))

    x_gan = np.array([[]])

    for i in range(batch_size):
      current_gan = generate_fakes()
      x_gan = np.append(x_gan, current_gan)

    y_gan = np.ones((batch_size,16))
    x_gan = x_gan.reshape(batch_size,16)

    gan.train_on_batch(x_gan, y_gan)
  
  # now training is complete, generate a value to use
  value_real = np.zeros((1,16))
  while value_real.all() == 0:
    x_using = generate_fakes()

    value = generator.predict(x_using)
    #check value fools discriminator
    value_real = discriminator.predict(value)
  
  print(value)

  return value


In [34]:
generator = make_generator()
discriminator = make_discriminator()
gan = make_gan(generator, discriminator)
gan.summary()

value_to_use = training(generator, discriminator, gan, BATCH_SIZE, N_EPOCHS)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[4.930305480957031, 0.66015625]
[0.1255708932876587, 0.9384765625]
Epoch : 804
51456
[4.920645713806152, 0.6650390625]
[0.1359073668718338, 0.93359375]
Epoch : 805
51520
[4.928830146789551, 0.66015625]
[0.10708406567573547, 0.955078125]
Epoch : 806
51584
[4.924449920654297, 0.66015625]
[0.11513456702232361, 0.9443359375]
Epoch : 807
51648
[4.935268402099609, 0.65234375]
[0.12263339757919312, 0.9375]
Epoch : 808
51712
[4.920173645019531, 0.6640625]
[0.11894598603248596, 0.943359375]
Epoch : 809
51776
[4.922563552856445, 0.6611328125]
[0.11423011124134064, 0.953125]
Epoch : 810
51840
[4.928131103515625, 0.654296875]
[0.12897466123104095, 0.9423828125]
Epoch : 811
51904
[4.9439849853515625, 0.64453125]
[0.09446646273136139, 0.962890625]
Epoch : 812
51968
[4.940761566162109, 0.6416015625]
[0.11275926232337952, 0.9462890625]
Epoch : 813
52032
[4.934743404388428, 0.65234375]
[0.12485478073358536, 0.9423828125]
Epoch : 814
52096