In [83]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

pd.set_option('display.max_rows', 500)


In [84]:
# Define model path if you want to load a model
use_model = True
model_path = "data/model_86.pt"

# Define if you want to use CPU or GPU
use_cpu = False
gpu_available = torch.cuda.is_available()


if gpu_available and not use_cpu:
	device = torch.device("cuda:0")
	print(f"GPU Support Enabled, Using: {torch.cuda.get_device_name(0)}")  # Prints the name of the GPU
else:
	device = torch.device("cpu")
	print("No GPU available, using CPU")

GPU Support Enabled, Using: NVIDIA GeForce RTX 2070 SUPER


In [85]:
df = pd.read_csv('data/final_data.csv')

In [86]:
df.head()

Unnamed: 0,status,orbital_period,semi_major_axis,planet_radius,transit_duration,insolation_flux,equilibrium_temperature,kepler_magnitude,stellar_radius,stellar_effective_temperature,stellar_surface_gravity,stellar_mass,stellar_metallicity
0,1,0.571336,0.01312,1.82,1.79,4293.0,2063.0,11.958,0.95,5496.0,4.42,0.92,0.06
1,1,2.180535,0.03558,16.197,2.9278,1860.0,1828.0,11.468,1.26,6360.0,4.34,1.26,0.137
2,1,3.5951,0.047,1.95,4.33,1037.0,1616.0,11.364,1.71,5430.0,3.99,1.05,0.2
3,1,1.673902,0.0422,3.45,1.6056,990.0,1094.0,10.408,1.19,6120.0,4.36,1.18,0.14
4,1,0.658524,0.01306,3.3,0.33336,879.5,1515.0,11.386,0.7,4285.0,4.58,0.69,-0.12


In [87]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7386 entries, 0 to 7385
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   status                         7386 non-null   int64  
 1   orbital_period                 7386 non-null   float64
 2   semi_major_axis                7386 non-null   float64
 3   planet_radius                  7386 non-null   float64
 4   transit_duration               7386 non-null   float64
 5   insolation_flux                7386 non-null   float64
 6   equilibrium_temperature        7386 non-null   float64
 7   kepler_magnitude               7386 non-null   float64
 8   stellar_radius                 7386 non-null   float64
 9   stellar_effective_temperature  7386 non-null   float64
 10  stellar_surface_gravity        7386 non-null   float64
 11  stellar_mass                   7386 non-null   float64
 12  stellar_metallicity            7386 non-null   f

In [88]:
df.describe()

Unnamed: 0,status,orbital_period,semi_major_axis,planet_radius,transit_duration,insolation_flux,equilibrium_temperature,kepler_magnitude,stellar_radius,stellar_effective_temperature,stellar_surface_gravity,stellar_mass,stellar_metallicity
count,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0
mean,0.38248,49.648224,0.18903,102.287896,5.7085,9209.798,1151.313837,14.236909,1.720254,5707.695911,4.313374,1.021002,-0.126083
std,0.486026,117.54678,0.29365,3166.460197,6.833667,177797.5,895.02044,1.413719,5.900127,822.079435,0.433957,0.351371,0.282097
min,0.0,0.241843,0.0059,0.08,0.1046,0.02,92.0,6.966,0.116,2661.0,0.047,0.094,-2.5
25%,0.0,2.237506,0.0334,1.51,2.486083,27.1425,582.0,13.405,0.827,5301.5,4.22,0.84,-0.26
50%,0.0,7.739694,0.0751,2.62,3.84232,178.38,932.0,14.495,0.997,5767.0,4.438,0.972,-0.1
75%,1.0,27.392805,0.1761,23.8025,6.238627,1122.505,1476.0,15.3035,1.34275,6113.0,4.544,1.099,0.07
max,1.0,3650.0,4.5,200346.0,138.54,10947550.0,14667.0,20.003,229.908,15896.0,5.283,3.686,0.56


In [89]:
# Nomalize data
df = (df - df.min()) / (df.max() - df.min())
df.describe()

Unnamed: 0,status,orbital_period,semi_major_axis,planet_radius,transit_duration,insolation_flux,equilibrium_temperature,kepler_magnitude,stellar_radius,stellar_effective_temperature,stellar_surface_gravity,stellar_mass,stellar_metallicity
count,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0,7386.0
mean,0.38248,0.013537,0.040749,0.00051,0.04048,0.000841,0.07268,0.557713,0.006981,0.2302,0.814815,0.258074,0.77579
std,0.486026,0.032207,0.065341,0.015805,0.049364,0.016241,0.061408,0.108439,0.025676,0.062114,0.08288,0.09782,0.092189
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.000547,0.006119,7e-06,0.017203,2e-06,0.033619,0.493902,0.003094,0.199509,0.796982,0.207684,0.732026
50%,0.0,0.002054,0.015398,1.3e-05,0.027,1.6e-05,0.057633,0.57751,0.003834,0.234681,0.838617,0.244432,0.784314
75%,1.0,0.007439,0.037872,0.000118,0.04431,0.000103,0.094957,0.639526,0.005339,0.260824,0.858862,0.279788,0.839869
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [90]:
# Split into train and test
train_df, test_df = train_test_split(df, test_size=0.2)

In [91]:
# Prep training data
x_train = train_df.drop('status', axis=1)
y_train = train_df['status']

# Convert to numpy arrays
x_train = np.array(x_train,dtype=np.float32)
y_train = np.array(y_train,dtype=np.float32).reshape(-1,1)

# Convert to tensors
X_train = torch.tensor(x_train, dtype=torch.float32)
Y_train = torch.tensor(y_train, dtype=torch.float32)

print(f"Input dimensions: {x_train.shape[1]}")
print(f"Output dimensions: {y_train.shape[1]}")

Input dimensions: 12
Output dimensions: 1


In [92]:
# Prep testing data
x_test = test_df.drop('status', axis=1)
y_test = test_df['status']

# Convert to numpy arrays
x_test = np.array(x_test,dtype=np.float32)
y_test = np.array(y_test,dtype=np.float32).reshape(-1,1)

# Convert to tensors
X_test = torch.tensor(x_test, dtype=torch.float32)
Y_test = torch.tensor(y_test, dtype=torch.float32)

In [93]:
# Creating the model
models = []
hidden_nodes = np.arange(12,50,1)
for h_n in hidden_nodes:
    print(f"Creating model with {h_n} hidden nodes")
    models.append(
        nn.Sequential(
            nn.Linear(12, h_n),
            nn.ReLU(),
            nn.Linear(h_n, 1),
            nn.ReLU(),
            nn.Linear(1, 1),
            nn.Sigmoid()
        )
    )
    models.append(
        nn.Sequential(
            nn.Linear(12, h_n),
            nn.ReLU(),
            nn.Linear(h_n, h_n),
            nn.ReLU(),
            nn.Linear(h_n, 1),
            nn.ReLU(),
            nn.Linear(1, 1),
            nn.Sigmoid()
        )
    )
    models.append(
        nn.Sequential(
            nn.Linear(12, h_n),
            nn.ReLU(),
            nn.Linear(h_n, h_n),
            nn.ReLU(),
            nn.Linear(h_n, h_n),
            nn.ReLU(),
            nn.Linear(h_n, 1),
            nn.ReLU(),
            nn.Linear(1, 1),
            nn.Sigmoid()
        )
    )


Creating model with 12 hidden nodes
Creating model with 13 hidden nodes
Creating model with 14 hidden nodes
Creating model with 15 hidden nodes
Creating model with 16 hidden nodes
Creating model with 17 hidden nodes
Creating model with 18 hidden nodes
Creating model with 19 hidden nodes
Creating model with 20 hidden nodes
Creating model with 21 hidden nodes
Creating model with 22 hidden nodes
Creating model with 23 hidden nodes
Creating model with 24 hidden nodes
Creating model with 25 hidden nodes
Creating model with 26 hidden nodes
Creating model with 27 hidden nodes
Creating model with 28 hidden nodes
Creating model with 29 hidden nodes
Creating model with 30 hidden nodes
Creating model with 31 hidden nodes
Creating model with 32 hidden nodes
Creating model with 33 hidden nodes
Creating model with 34 hidden nodes
Creating model with 35 hidden nodes
Creating model with 36 hidden nodes
Creating model with 37 hidden nodes
Creating model with 38 hidden nodes
Creating model with 39 hidde

In [94]:
# Define loss function
loss_fn = nn.BCELoss()  # binary cross entropy

# Define optimizer
optimizers = []
for model in models:
	optimizers.append(optim.Adam(model.parameters(), lr=0.001))

In [95]:
# Transfer model to GPU if available
if gpu_available and not use_cpu:
	print(f"Transferring model to GPU: {torch.cuda.get_device_name(0)}")
	for model in models:
		model = model.cuda()
		loss_fn = loss_fn.cuda()

Transferring model to GPU: NVIDIA GeForce RTX 2070 SUPER


In [96]:
# Define training parameters
n_epochs = 100
batch_size = 10

# Train the models
for epoch in range(n_epochs):
    for i in range(0, len(X_train), batch_size):
        # Get batch
        X_batch = X_train[i:i+batch_size]
        Y_batch = Y_train[i:i+batch_size]

        # Transfer to GPU if available
        if gpu_available and not use_cpu:
            X_batch = X_batch.cuda()
            Y_batch = Y_batch.cuda()

        # Feedforward
        for model, optimizer in zip(models, optimizers):
            Y_pred = model(X_batch)

            # Calculate loss
            loss = loss_fn(Y_pred, Y_batch)

            # Backpropagate error and update weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Print loss after each epoch
    print(f'Finished epoch {epoch}, latest loss {loss}')

Finished epoch 0, latest loss 0.7241796255111694
Finished epoch 1, latest loss 0.72336745262146
Finished epoch 2, latest loss 0.7230351567268372
Finished epoch 3, latest loss 0.722898542881012
Finished epoch 4, latest loss 0.7228422164916992
Finished epoch 5, latest loss 0.722818911075592
Finished epoch 6, latest loss 0.7228093147277832
Finished epoch 7, latest loss 0.7228052616119385
Finished epoch 8, latest loss 0.7228035926818848
Finished epoch 9, latest loss 0.7228029370307922
Finished epoch 10, latest loss 0.7228026390075684
Finished epoch 11, latest loss 0.7228025197982788
Finished epoch 12, latest loss 0.722802460193634
Finished epoch 13, latest loss 0.7228024005889893
Finished epoch 14, latest loss 0.7228024005889893
Finished epoch 15, latest loss 0.7228024005889893
Finished epoch 16, latest loss 0.7228024005889893
Finished epoch 17, latest loss 0.7228024005889893
Finished epoch 18, latest loss 0.7228023409843445
Finished epoch 19, latest loss 0.7228023409843445
Finished epoch 

In [97]:
# Make predictions
X_test = X_test.to(device)
with torch.no_grad():
    # Run the model on the test data and transfer the result to CPU
    Y_pred = []
    for model in models:
        Y_pred.append(model(X_test).to("cpu"))

In [118]:
# Calculate the accuracy
best_accuracy = 0.0
for y_pred, model in zip(Y_pred, models):
	accuracy = (y_pred.round() == Y_test).float().mean()

	# Save best model
	if accuracy > best_accuracy:
		best_accuracy = accuracy
		best_model = model

	# Print accuracy
	print(f"Accuracy {accuracy}")

print(f"Best accuracy {best_accuracy}")
print(f"Best model {best_model}")

Accuracy 0.8525033593177795
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.8619756698608398
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.8538565635681152
Accuracy 0.8619756698608398
Accuracy 0.8646820187568665
Accuracy 0.6089310050010681
Accuracy 0.8592692613601685
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.8612990379333496
Accuracy 0.8640053868293762
Accuracy 0.8612990379333496
Accuracy 0.8558863401412964
Accuracy 0.8673883676528931
Accuracy 0.6089310050010681
Accuracy 0.8606224656105042
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.858592689037323
Accuracy 0.8640053868293762
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.8707712888717651
Accuracy 0.6089310050010681
Accuracy 0.8667117953300476
Accuracy 0.6089310050010681
Accuracy 0.6089310050010681
Accuracy 0.8680649399757385
Accuracy 0.8599458932876587
Accuracy 0.6089310050

In [99]:
torch.save(best_model, "data/model_87_2hidden_31_nodes.pt")

Accuracy 0.6136671304702759 Accuracy 0.8464140892028809
Accuracy 0.6136671304702759 Accuracy 0.8531799912452698
Accuracy 0.8430311083793640 Accuracy 0.8281461596488953
Accuracy 0.6136671304702759 Accuracy 0.8396481871604919
Accuracy 0.6136671304702759 Accuracy 0.8450608849525452
Accuracy 0.8443843126296997 Accuracy 0.6028416752815247
Accuracy 0.6136671304702759 Accuracy 0.6028416752815247
Accuracy 0.6136671304702759 Accuracy 0.6028416752815247
Accuracy 0.6136671304702759 Accuracy 0.6028416752815247
Accuracy 0.6136671304702759 Accuracy 0.6028416752815247
Accuracy 0.8511502146720886 Accuracy 0.6028416752815247
Accuracy 0.8477672338485718 Accuracy 0.8579161167144775

In [117]:
best_accuracy

tensor(0.8735)