# Predict Single Morphology Variable (Sersic n) Based on Multiple Star-formation Variables (M*, SFR and more)


In [32]:
#Loading needed modules and classes/functions 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

#3 Linear layers NN, 1 hidden 
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize,hiddenSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, hiddenSize)
        self.linear1 = torch.nn.Linear(hiddenSize, hiddenSize)
        self.linear2= torch.nn.Linear(hiddenSize, outputSize)
        self.ReLU= torch.nn.ReLU()
        self.Sigmoid= torch.nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.Sigmoid(x)
        x= self.linear1(x)
        x= self.Sigmoid(x)
        x = self.linear2(x)
        return x




# Importing Data from Schema Table

In [2]:


data=pd.read_csv('CompleteTable.csv')  #Importing All MaNGA Data from DPRall Schema

galaxy_list=np.loadtxt('Query Results',dtype=str) #Pulling Manga ID's of galaxies which satisfy log(M) > 9 and 0 < z < 0.1

galaxy_index=np.zeros(len(galaxy_list)) 
for i in range (len(galaxy_list)): #Getting the index of these galaxies in the schema table
    galaxy_index[i]=np.where(data.loc[:,'mangaid']==galaxy_list[i])[0][0]

galaxy_index=np.array(galaxy_index,dtype=int) #Ensuring we have array that can be used to index, force int 

galaxies=data.iloc[galaxy_index] #DF of galaxies which satisfies the condition, contains all relevant schema data 

#Creating the arrays of the independent variables were are interested in, and dependent variable n 

mass=galaxies.loc[:,'nsa_sersic_mass']
log_mass=np.log10(mass)

SFR=galaxies.loc[:,'sfr_tot']
log_SFR=np.log10(SFR)

ha_flux=galaxies.loc[:,'emline_gflux_tot_ha_6564']

n=galaxies.loc[:,'nsa_sersic_n']
n=np.array(n,dtype=np.float32)
n=torch.from_numpy(n).to('cuda:0').reshape(-1,1)






# Prep the input data to go into a DataLoader 

In [29]:

inputs=[log_SFR,log_mass]

def data_preparer(inputs):  
    """
    Takes in a list in which each element is an input variable and then preps
    it accordingly to return it as one combined GPU pytorch tensor. 
    """
    for i in range (len(inputs)):
        inputs[i]=np.array(inputs[i],dtype=np.float32) #makes all inputs np arrays of np.float 32
    reshape=np.column_stack(inputs)
    out=torch.from_numpy(reshape).to('cuda:0')
    return(out)

inputs_tensor=data_preparer(inputs)

print(np.shape(inputs_tensor))
print(np.shape(n))
print(len(n))

#Create Tensor Datasets 
train_ds, test_ds, validate_ds=torch.utils.data.random_split(TensorDataset(inputs_tensor,n),[2183,727,728]) #Better way to automate these splits? 

#Create Data Loaders
train_dl=DataLoader(train_ds,batch_size=64,shuffle=True)
test_dl=DataLoader(test_ds,batch_size=64,shuffle=True)
validate_dl=DataLoader(validate_ds,batch_size=64,shuffle=True)

print(train_ds[0])



torch.Size([3638, 2])
torch.Size([3638, 1])
3638
(tensor([-1.0401, 10.4696], device='cuda:0'), tensor([1.4706], device='cuda:0'))


# The Model (N inputs, 1 output, CUDA Enabled)

In [35]:

inputDim=len(inputs)
outputDim=1 
hiddenSize=50
learningRate=0.1

model = linearRegression(inputDim, outputDim,hiddenSize)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

#Training Loop
epochs=1000

epoch_array=np.zeros(epochs)
loss_array=np.zeros(epochs)
for epoch in range(epochs): #Forward Pass and loss
    for xb,yb in train_dl:
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model(xb)

        # get loss for the predicted output
        loss = criterion(outputs, yb)
        # print(loss)
        # get gradients w.r.t to parameters, (backward pass)
        loss.backward()

        # update parameters
        optimizer.step()

        epoch_array[epoch]=epoch 
        loss_array[epoch]=loss.item()

        if epoch %100==0:
            print('epoch {}, loss {}'.format(epoch, loss.item()))

with torch.no_grad(): # we don't need gradients in the testing phase
    predicted = model(inputs_tensor)
    print(predicted)



'cuda:0', grad_fn=<MseLossBackward>)
tensor(1.4123, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.4003, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.4930, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.5388, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.4973, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.1423, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(2.1629, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.6699, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(2.0543, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.5456, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(2.1557, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.4519, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.2564, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.7410, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.9167, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(2.1621, device='cuda:0', grad_fn=<MseLossBackward>)
tensor(1.6535, devi

In [36]:
from mpl_toolkits.mplot3d import Axes3D

%matplotlib qt
fig=plt.figure()
ax= plt.axes(projection='3d')
ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0].flatten(),inputs_tensor.cpu().detach().numpy()[:,1].flatten(),predicted.cpu().detach().numpy().flatten(),'red')
ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0],inputs_tensor.cpu().detach().numpy()[:,1],n.cpu().detach().numpy(),alpha=0.5)


# plt.scatter(inputs_tensor.cpu().detach().numpy(),n.cpu().detach().numpy(),alpha=0.2)
# plt.scatter(inputs_tensor.cpu().detach().numpy(),predicted.cpu().detach().numpy())


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f1f08d48a50>

In [27]:
plt.plot(epoch_array,np.log(loss_array))

[<matplotlib.lines.Line2D at 0x7f1f181d0450>]