# Imports

In [1]:
import numpy as np
import pandas as pd
import random as rand


from keras.utils import to_categorical
from keras.layers import Dense, Activation, Conv1D, Dropout
from keras.models import Sequential

# Data pre-processing

Due to the large size of dataset, many columns that are not needed and some rows containing null values have already been dropped

In [2]:
data = pd.read_csv("dataset.csv")

data = data.drop(['class', 'epoch_cal'], axis=1)
data = data.dropna()
data['neo'] = data['neo'].replace(('Y','N'), (1,0))
data['pha'] = data['pha'].replace(('Y','N'), (1,0))

data = data.astype('float32')
data /= 255

print(data.shape)

(131124, 16)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['neo'] = data['neo'].replace(('Y','N'), (1,0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pha'] = data['pha'].replace(('Y','N'), (1,0))


# Generator model
16 outputs as there are 16 columns in the dataset

In [3]:
def make_generator():
  model = Sequential()
  
  model.add(Dense(40, activation='relu', input_shape=INPUT_SHAPE))
  model.add(Dense(1, activation='sigmoid'))

  return model

In [4]:
def generate_fakes():
  neo = rand.choice([1,0]) 
  pha = rand.choice([1,0])
  H = rand.uniform(0,10000)
  diameter = rand.uniform(0,100000)
  albedo = rand.uniform(0,1)
  e = rand.uniform(0, 0.002)
  a = rand.uniform(0,1)
  q = rand.uniform(0,1)
  i = rand.uniform(0,1)
  om = rand.uniform(0,1)
  w = rand.uniform(0,2)
  ad = rand.uniform(0,0.05)
  n = rand.uniform(0,0.01)
  tp_cal = rand.uniform(0,100000)
  per = rand.uniform(0,10)
  moid = rand.uniform(0,0.1)

  X = np.array([[neo, pha, H, diameter, albedo, e, a, q, i, om, w, ad, n, tp_cal, per, moid]])
  return X

# Discriminator model
This is a binary CNN classifier, to determine whether the input is 'real' or not

In [5]:
def make_discriminator():
  model = Sequential()

  model.add(Conv1D(64, kernel_size=5, padding='same', activation='relu', input_shape=INPUT_SHAPE))

  model.add(Dense(1, activation='sigmoid'))

  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

Generate random, fake samples for training discriminator

# Putting the GAN together

In [6]:
def make_gan(generator, discriminator):
  discriminator.trainable = False
  model = Sequential()
  model.add(generator)
  model.add(discriminator)
  model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
  return model

Training parameters


In [7]:
INPUT_SHAPE = (16,1)
OPTIMIZER = 'adam'
LOSS = 'binary_crossentropy'
METRICS = 'accuracy'

BATCH_SIZE = 128
N_EPOCHS = 10000
VERBOSE = 1
VALIDATION_SPLIT = 0.2

In [10]:
def training(generator, discriminator, gan, batch_size, n_epochs):
  half_batch = int(batch_size/2)
  for i in range(n_epochs): #10000 epochs

    print("Epoch = " + str(i+1))

    x_real = data.head(half_batch)  # fix so its not 'head' and is seeing new data each time
    y_real = np.ones((half_batch, 16))

    x_fake = np.array([[]])
    for i in range(half_batch):
      
      current_fake = generate_fakes()
      x_fake = np.append(x_fake, current_fake)

    y_fake = np.zeros((half_batch, 16))

    x_fake = x_fake.reshape(half_batch,16)

    discriminator.train_on_batch(x_real, y_real)
    discriminator.train_on_batch(x_fake, y_fake)

    x_gan = np.array([[]])

    for i in range(batch_size):
      current_gan = generate_fakes()
      x_gan = np.append(x_gan, current_gan)

    y_gan = np.ones((batch_size,16))
    x_gan = x_gan.reshape(batch_size,16)

    gan.train_on_batch(x_gan, y_gan)
  
  # now training is complete, generate a value to use
  value_real = np.zeros((1,16))
  while value_real.all() == 0:
    x_using = generate_fakes()


    value = generator.predict(x_using)
    #check value fools discriminator
    value_real = discriminator.predict(value)
  
  print(value)

  return value


In [11]:
generator = make_generator()
discriminator = make_discriminator()
gan = make_gan(generator, discriminator)
gan.summary()

value_to_use = training(generator, discriminator, gan, BATCH_SIZE, N_EPOCHS)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch = 5019
Epoch = 5020
Epoch = 5021
Epoch = 5022
Epoch = 5023
Epoch = 5024
Epoch = 5025
Epoch = 5026
Epoch = 5027
Epoch = 5028
Epoch = 5029
Epoch = 5030
Epoch = 5031
Epoch = 5032
Epoch = 5033
Epoch = 5034
Epoch = 5035
Epoch = 5036
Epoch = 5037
Epoch = 5038
Epoch = 5039
Epoch = 5040
Epoch = 5041
Epoch = 5042
Epoch = 5043
Epoch = 5044
Epoch = 5045
Epoch = 5046
Epoch = 5047
Epoch = 5048
Epoch = 5049
Epoch = 5050
Epoch = 5051
Epoch = 5052
Epoch = 5053
Epoch = 5054
Epoch = 5055
Epoch = 5056
Epoch = 5057
Epoch = 5058
Epoch = 5059
Epoch = 5060
Epoch = 5061
Epoch = 5062
Epoch = 5063
Epoch = 5064
Epoch = 5065
Epoch = 5066
Epoch = 5067
Epoch = 5068
Epoch = 5069
Epoch = 5070
Epoch = 5071
Epoch = 5072
Epoch = 5073
Epoch = 5074
Epoch = 5075
Epoch = 5076
Epoch = 5077
Epoch = 5078
Epoch = 5079
Epoch = 5080
Epoch = 5081
Epoch = 5082
Epoch = 5083
Epoch = 5084
Epoch = 5085
Epoch = 5086
Epoch = 5087
Epoch = 5088
Epoch = 5089
Epoch = 5090