# Imports

In [70]:
import numpy as np
import pandas as pd


from keras.utils import to_categorical
from keras.layers import Dense, Activation, Conv1D, Dropout
from keras.models import Sequential

# Data pre-processing

Due to the large size of dataset, many columns that are not needed and some rows containing null values have already been dropped

In [71]:
data = pd.read_csv("dataset.csv")

data = data.drop(['class'], axis=1)
data = data.dropna()
data['neo'] = data['neo'].replace(('Y','N'), (1,0))
data['pha'] = data['pha'].replace(('Y','N'), (1,0))

#num_classes = data['class'].nunique()
#classes = to_categorical(data['class'], num_classes)

data = data.astype('float32')
data /= 255

print(data.shape)

print(data.info)

(131124, 17)
<bound method DataFrame.info of         neo  pha         H  diameter    albedo     epoch_cal         e  \
0       0.0  0.0  0.013333  3.683922  0.000353  79178.148438  0.000298   
1       0.0  0.0  0.016471  2.137255  0.000396  79217.773438  0.000902   
2       0.0  0.0  0.020902  0.967043  0.000839  79217.773438  0.001008   
3       0.0  0.0  0.011765  2.060392  0.001658  79178.148438  0.000348   
4       0.0  0.0  0.027059  0.418427  0.001075  79217.773438  0.000749   
...     ...  ...       ...       ...       ...           ...       ...   
573686  0.0  0.0  0.061176  0.017612  0.000094  79217.773438  0.000717   
573687  0.0  0.0  0.060000  0.018020  0.000361  79217.773438  0.000289   
573688  0.0  0.0  0.060392  0.016024  0.000455  79217.773438  0.000326   
573689  0.0  0.0  0.064314  0.012820  0.000408  79217.773438  0.000827   
573690  0.0  0.0  0.065882  0.008361  0.000243  79217.773438  0.000899   

               a         q         i        om         w        ad

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['neo'] = data['neo'].replace(('Y','N'), (1,0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['pha'] = data['pha'].replace(('Y','N'), (1,0))


In [20]:
data

Unnamed: 0,neo,pha,H,diameter,albedo,epoch_cal,e,a,q,i,om,w,ad,n,tp_cal,per,moid
0,0,0,3.40,939.400,0.0900,20190427,0.076009,2.769165,2.558684,10.594067,80.305531,73.597695,2.979647,0.213885,20180430.25,1683.145703,1.59478
1,0,0,4.20,545.000,0.1010,20200531,0.229972,2.773841,2.135935,34.832932,173.024741,310.202392,3.411748,0.213345,20180721.46,1687.410992,1.23429
2,0,0,5.33,246.596,0.2140,20200531,0.256936,2.668285,1.982706,12.991043,169.851483,248.066193,3.353865,0.226129,20181123.29,1592.013769,1.03429
3,0,0,3.00,525.400,0.4228,20190427,0.088721,2.361418,2.151909,7.141771,103.810804,150.728541,2.570926,0.271609,20180509.06,1325.432763,1.13948
4,0,0,6.90,106.699,0.2740,20200531,0.190913,2.574037,2.082619,5.367427,141.571026,358.648418,3.065455,0.238661,20200317.22,1508.414421,1.09575
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
573686,0,0,15.60,4.491,0.0240,20200531,0.182925,2.937086,2.399819,27.965560,171.988705,161.151266,3.474352,0.195807,20180122.67,1838.541007,1.40243
573687,0,0,15.30,4.595,0.0920,20200531,0.073698,3.176190,2.942112,23.193462,43.051531,207.110303,3.410268,0.174118,20220911.64,2067.560001,1.94299
573688,0,0,15.40,4.086,0.1160,20200531,0.083151,3.229042,2.960545,16.428329,63.037073,72.835544,3.497539,0.169861,20210320.91,2119.380839,2.02639
573689,0,0,16.40,3.269,0.1040,20200531,0.210836,3.170190,2.501800,13.479527,80.175164,75.253005,3.838581,0.174613,20210522.88,2061.704725,1.55350


Training parameters


In [72]:
INPUT_SHAPE = (17,1)
OPTIMIZER = 'adam'
LOSS = 'binary_crossentropy'
METRICS = 'accuracy'

BATCH_SIZE = 128
N_EPOCHS = 20
VERBOSE = 1

# Generator model
18 outputs as there are 18 columns in the dataset

In [40]:
def make_generator(latent_dim):
  model = Sequential()
  
  model.add(Dense(40, activation='relu', input_dim=latent_dim))
  model.add(Dense(18, activation='sigmoid'))

  return model

# Discriminator model
This is a binary CNN classifier, to determine whether the input is 'real' or not

In [75]:
def make_discriminator():
  model = Sequential()

  model.add(Conv1D(64, kernel_size=5, padding='same', activation='relu', input_shape=(17,1)))

  model.add(Dense(1, activation='sigmoid'))

  return model

In [80]:
model = make_discriminator()

model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
model.summary()

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_21 (Conv1D)          (None, 17, 64)            384       
                                                                 
 dense_12 (Dense)            (None, 17, 1)             65        
                                                                 
Total params: 449
Trainable params: 449
Non-trainable params: 0
_________________________________________________________________


In [81]:
history = model.fit(data, epochs=N_EPOCHS, verbose=VERBOSE)

Epoch 1/20


ValueError: ignored

# Putting the GAN together

In [None]:
def make_gan(generator, discriminator):
  model = Sequential()
  model.add(generator)
  model.add(discriminator)
  model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
  return model

In [None]:
generator = make_generator()
discriminator = make_discriminator()
gan = make_gan(generator, discriminator)