In [1]:
import pandas as pd
import numpy
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import  classification_report
from sklearn.metrics import r2_score
import numpy as np
import chime
import tqdm

In [2]:
# Define the neural network model for classification
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

    def train(self, train_loader, criterion, optimizer, num_epochs):
        for epoch in range(num_epochs):
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
    
    def predict(self, inputs):
        with torch.no_grad():
            outputs = self(inputs)
            _, predicted = torch.max(outputs, 1)
        return predicted

In [3]:
# define neural network model for regression
class Regression(nn.Module):

  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(4, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

In [3]:
# Load atmospheric CRN metric data

abiotic_flux=pd.read_csv('Archean Earth flux network metrics, no life.csv') # abiotic case
biotic_flux=pd.read_csv('Archean Earth flux network metrics, with life.csv') # biotic case
abiotic_steady_state=pd.read_csv('Archean Earth steady state network metrics, no life.csv') # weird abiotic case
anomalous_high_flux=pd.read_csv('Archean Earth agnostic high flux network metrics, no life.csv') # second weird abiotic case

exo_combined=pd.concat([abiotic_flux,biotic_flux,abiotic_steady_state,anomalous_high_flux])
exo_data=exo_combined[['Mean degree','Average shortest path length','CH4 abundance']]
exo_target=exo_combined['Has life?']

In [4]:
# Define classifier input values
input_size = exo_data.shape[1]
hidden_size = 100
num_classes=2 # because, hey, a planet either has life, or it doesn't!
learning_rate = 0.01
batch_size = 32
num_epochs = 100

X_train, X_test, y_train, y_test = train_test_split(exo_data,exo_target, test_size=0.2, random_state=23) 


In [62]:
# Convert the data into PyTorch tensors, create datasets, and then tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) 

In [6]:
# Train Model

model = Classifier(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
model.train(train_loader, criterion, optimizer, num_epochs)
chime.success()


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [8]:
# Test model

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_pred = model.predict(X_test_tensor)
y_pred_np = np.array(y_pred)

correct = sum(y_test == y_pred_np)
accuracy = correct / len(y_test)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred_np))

Accuracy: 0.875
              precision    recall  f1-score   support

           0       0.93      0.88      0.90      1093
           1       0.78      0.86      0.82       531

    accuracy                           0.88      1624
   macro avg       0.85      0.87      0.86      1624
weighted avg       0.88      0.88      0.88      1624



In [10]:
# Not too shabby!
# Now let's try with spectral data


input_size = exo_data.shape[1]
batch_size = 10
model=Regression()
n_epoch=100

# loss function and optimizer
loss_function =nn.MSELoss()
  # mean square error
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# input data
exo_data=pd.read_csv('exo_data.csv')
exo_spectra=exo_data[['CH4 abundance','Mean degree','Average shortest path length','Clustering coefficient']]
exo_target=exo_data['CFOS']

#split and format data

X_train, X_test, y_train, y_test = train_test_split(exo_spectra,exo_target, test_size=0.2, random_state=23) 


X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor= torch.tensor(y_test.values,dtype=torch.float32)

# load data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) 




In [9]:
# training loop
for epoch in range(n_epochs):
    
    
    # Set current loss value
    current_loss = 0.0
    # Iterate over the DataLoader for training data
    for i, data in enumerate(train_loader, 0):
      
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      targets = targets.reshape((targets.shape[0], 1))
        # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = model(inputs)
      
      # Compute loss
      loss = loss_function(outputs, targets)
      
      # Perform backward pass
      loss.backward()
      
      # Perform optimization
      optimizer.step()
      


NameError: name 'n_epochs' is not defined

In [127]:
# Test model
y_pred = model(X_test_tensor)
model_score=r2_score(y_test_tensor.detach().numpy(),y_pred.detach().numpy())
print(model_score)


-1.398872909891502e+18


In [13]:
# Huh. Looks like we're going to need to find a better metric for spectral information
# Let's try spectral variance

# input data
exo_data=pd.read_csv('exo_data.csv')
exo_spectra=exo_data[['CH4 abundance','Mean degree','Average shortest path length','Clustering coefficient']]
exo_target=exo_data['Spectral variance']

n_epochs=100
input_size = exo_data.shape[1]
batch_size = 10
model=Regression()

# loss function and optimizer
loss_function =nn.MSELoss()
  # mean square error
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


#split and format data

X_train, X_test, y_train, y_test = train_test_split(exo_spectra,exo_target, test_size=0.2, random_state=23) 


X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor= torch.tensor(y_test.values,dtype=torch.float32)

# load data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) 


In [14]:
# training loop
for epoch in range(n_epochs):
    
    
    # Set current loss value
    current_loss = 0.0
    # Iterate over the DataLoader for training data
    for i, data in enumerate(train_loader, 0):
      
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      targets = targets.reshape((targets.shape[0], 1))
        # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = model(inputs)
      
      # Compute loss
      loss = loss_function(outputs, targets)
      
      # Perform backward pass
      loss.backward()
      
      # Perform optimization
      optimizer.step()

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [15]:
y_pred = model(X_test_tensor)
model_score=r2_score(y_test_tensor.detach().numpy(),y_pred.detach().numpy())
print(model_score)

-2.9253360078003634e+19
