In [109]:
import pandas as pd
df = pd.read_csv('KOI.csv', on_bad_lines='skip', comment='#')
df = df.dropna()
df

Unnamed: 0,kepid,kepoi_name,kepler_name,koi_disposition,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,...,koi_insol,koi_model_snr,koi_tce_plnt_num,koi_tce_delivname,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,10797460,K00752.01,Kepler-227 b,CONFIRMED,CANDIDATE,1.000,0,0,0,0,...,93.59,35.8,1.0,q1_q17_dr25_tce,5455.0,4.467,0.927,291.93423,48.141651,15.347
1,10797460,K00752.02,Kepler-227 c,CONFIRMED,CANDIDATE,0.969,0,0,0,0,...,9.11,25.8,2.0,q1_q17_dr25_tce,5455.0,4.467,0.927,291.93423,48.141651,15.347
4,10854555,K00755.01,Kepler-664 b,CONFIRMED,CANDIDATE,1.000,0,0,0,0,...,926.16,40.9,1.0,q1_q17_dr25_tce,6031.0,4.438,1.046,288.75488,48.226200,15.509
5,10872983,K00756.01,Kepler-228 d,CONFIRMED,CANDIDATE,1.000,0,0,0,0,...,114.81,66.5,1.0,q1_q17_dr25_tce,6046.0,4.486,0.972,296.28613,48.224670,15.714
6,10872983,K00756.02,Kepler-228 c,CONFIRMED,CANDIDATE,1.000,0,0,0,0,...,427.65,40.2,2.0,q1_q17_dr25_tce,6046.0,4.486,0.972,296.28613,48.224670,15.714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8665,10843431,K07378.01,Kepler-1999 b,CONFIRMED,CANDIDATE,0.994,0,0,0,0,...,26.17,15.6,1.0,q1_q17_dr25_tce,4406.0,4.643,0.628,282.37042,48.259289,16.324
8817,7350067,K06863.01,Kepler-1646 b,CONFIRMED,CANDIDATE,1.000,0,0,0,0,...,4.20,15.0,1.0,q1_q17_dr25_tce,3236.0,5.097,0.193,287.52640,42.912899,15.737
9014,8311864,K07016.01,Kepler-452 b,CONFIRMED,CANDIDATE,0.771,0,0,0,0,...,0.56,12.3,1.0,q1_q17_dr25_tce,5579.0,4.580,0.798,296.00369,44.277561,13.426
9083,7935997,K05447.02,Kepler-1629 b,CONFIRMED,CANDIDATE,0.000,0,0,0,0,...,323.21,14.2,1.0,q1_q17_dr25_tce,5713.0,4.541,0.893,280.20660,43.774288,12.750


In [110]:
df.shape

(2727, 27)

In [111]:
unrelated_features = ['kepid', 'kepoi_name', 'kepler_name', 'koi_disposition', 'koi_insol', 'koi_slogg', 'koi_fpflag_ec', 'koi_teq', 'koi_tce_delivname']
df = df.drop(unrelated_features, axis = 1)
kept_columns = list(df.columns)
kept_columns

['koi_pdisposition',
 'koi_score',
 'koi_fpflag_nt',
 'koi_fpflag_ss',
 'koi_fpflag_co',
 'koi_period',
 'koi_time0bk',
 'koi_impact',
 'koi_duration',
 'koi_depth',
 'koi_prad',
 'koi_model_snr',
 'koi_tce_plnt_num',
 'koi_steff',
 'koi_srad',
 'ra',
 'dec',
 'koi_kepmag']

In [112]:
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
df['koi_pdisposition'] = label_encoder.fit_transform(df['koi_pdisposition'])

In [113]:
#Creating separate dataframes
#df_Features will have the characteristics that the model will learn to associate with the corresponding candidates or false positives in df_target
df_features = df.drop(['koi_pdisposition'], axis = 1)
df_target = df[['koi_pdisposition']]

In [114]:
from sklearn.model_selection import train_test_split
import torch
import numpy as np

In [115]:
#split data into training and testing data
x_train, x_test, y_train, y_test = train_test_split(df_features, df_target, test_size = 0.2, random_state = 0)

In [116]:
x_train = torch.from_numpy(x_train.values).float()
x_test = torch.from_numpy(x_test.values).float()

y_train = torch.from_numpy(y_train.values).view(1, -1)[0]
y_test = torch.from_numpy(y_test.values).view(1, -1)[0]

In [117]:
import torch.nn as nn
import torch.nn.functional as F

In [118]:
#now to start building the model!
#checking the shape of our tensors to determine input sizes for the neural networks
x_train.shape, y_train.shape

(torch.Size([2181, 17]), torch.Size([2181]))

In [119]:
input_size = 17
#17 features the model will look at
output_size = 2
#binary output, yes or no
hid1_size = 64
hid2_size = 32
#hidden layer size determined through some trial and error

In [120]:
class Net(nn.Module):
    def __init__(self, input_size, hid1_size, hid2_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hid1_size)
        self.fc2 = nn.Linear(hid1_size, hid2_size)
        self.fc3 = nn.Linear(hid2_size, output_size)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim = -1)

In [121]:
model = Net(input_size, hid1_size, hid2_size, output_size)