# Imports

In [2]:
import numpy as np
import pandas as pd
import random as rand


from keras.utils import to_categorical
from keras.layers import Dense, Activation, Conv1D, Conv2D, Dropout
from keras.models import Sequential

# Data pre-processing

Due to the large size of dataset, many columns that are not needed and some rows containing null values have already been dropped

In [17]:
data = pd.read_csv("dataset.csv")

data = data.drop(['class', 'epoch_cal'], axis=1)
data = data.dropna()
data['neo'] = data['neo'].replace(('Y','N'), (1,0))
data['pha'] = data['pha'].replace(('Y','N'), (1,0))

data = data.astype('float32')
data /= 255

print(data.shape)

(131124, 16)


Training parameters


In [18]:
INPUT_SHAPE = (16,1)
OPTIMIZER = 'adam'
LOSS = 'binary_crossentropy'
METRICS = 'accuracy'

BATCH_SIZE = 128
N_EPOCHS = 20
VERBOSE = 1
VALIDATION_SPLIT = 0.2

# Generator model
16 outputs as there are 16 columns in the dataset

In [21]:
def make_generator():
  model = Sequential()
  
  model.add(Dense(40, activation='relu', input_shape=INPUT_SHAPE))
  model.add(Dense(16, activation='sigmoid'))

  return model

In [9]:
def generate_fakes():
  neo = rand.choice([1,0]) 
  pha = rand.choice([1,0])
  H = rand.uniform(0,10000)
  diameter = rand.uniform(0,100000)
  albedo = rand.uniform(0,1)
  e = rand.uniform(0, 0.002)
  a = rand.uniform(0,1)
  q = rand.uniform(0,1)
  i = rand.uniform(0,1)
  om = rand.uniform(0,1)
  w = rand.uniform(0,2)
  ad = rand.uniform(0,0.05)
  n = rand.uniform(0,0.01)
  tp_cal = rand.uniform(0,100000)
  per = rand.uniform(0,10)
  moid = rand.uniform(0,0.1)

  X = np.array([[neo, pha, H, diameter, albedo, e, a, q, i, om, w, ad, n, tp_cal, per, moid]])
  return X

# Discriminator model
This is a binary CNN classifier, to determine whether the input is 'real' or not

In [6]:
def make_discriminator():
  model = Sequential()

  model.add(Conv1D(64, kernel_size=5, padding='same', activation='relu', input_shape=INPUT_SHAPE))

  model.add(Dense(1, activation='sigmoid'))

  return model

In [7]:
model = make_discriminator()

model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 16, 64)            384       
                                                                 
 dense (Dense)               (None, 16, 1)             65        
                                                                 
Total params: 449
Trainable params: 449
Non-trainable params: 0
_________________________________________________________________


In [8]:
x_real = data.head(65000)
y_real = np.ones((65000))

Generate random, fake samples for training discriminator

In [14]:
x_fake = np.array([[]])
for i in range(65000):
  # fix below line - needs to add as a new row, not each value from the function as its own separate row
  current_fake = generate_fakes()
  x_fake = np.append(x_fake, current_fake)

y_fake = np.zeros(65000)

x_fake = x_fake.reshape(65000,16)
print(x_fake.shape)
x = pd.DataFrame(x_fake)
print(x.shape, x_real.shape)


x = np.append(x,x_real)
y = pd.DataFrame(pd.Series(y_real))
y = pd.concat([y, pd.Series(y_fake)])

print(x.shape, y.shape)

x = np.reshape(x, (130000,16))

print(x.shape, y.shape)


(65000, 16)
(65000, 16) (65000, 16)
(2080000,) (130000, 1)
(130000, 16) (130000, 1)


In [15]:
print(x.shape, y.shape)

history = model.fit(x, y, epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

(130000, 16) (130000, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Putting the GAN together

In [19]:
def make_gan(generator, discriminator):
  discriminator.trainable = False
  model = Sequential()
  model.add(generator)
  model.add(discriminator)
  model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
  return model

In [23]:
generator = make_generator()
discriminator = make_discriminator()
gan = make_gan(generator, discriminator)
gan.summary()

ValueError: ignored