# Predict Single Morphology Variable (Sersic n) Based on Multiple Star-formation Variables (M*, SFR and more)


In [17]:
#Loading needed modules and classes/functions 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from sklearn.preprocessing import StandardScaler

#3 Linear layers NN, 1 hidden 
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize,hiddenSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, hiddenSize)
        self.linear1 = torch.nn.Linear(hiddenSize, hiddenSize)
        self.linear2 = torch.nn.Linear(hiddenSize,hiddenSize)
        self.linear3 = torch.nn.Linear(hiddenSize,hiddenSize)
        self.linear4= torch.nn.Linear(hiddenSize, outputSize)
        self.ReLU= torch.nn.ReLU()
        self.Sigmoid= torch.nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.Sigmoid(x)
        # x= self.linear1(x)
        # x= self.Sigmoid(x)
        # x= self.linear2(x)
        # x= self.Sigmoid(x)
        x = self.linear3(x)
        x= self.Sigmoid(x)
        x=self.linear4(x)
        return x




# Importing Data from Schema Table

In [6]:


data=pd.read_csv('CompleteTable.csv')  #Importing All MaNGA Data from DPRall Schema

galaxy_list=np.loadtxt('Query Results',dtype=str) #Pulling Manga ID's of galaxies which satisfy log(M) > 9 and 0 < z < 0.1

galaxy_index=np.zeros(len(galaxy_list)) 
for i in range (len(galaxy_list)): #Getting the index of these galaxies in the schema table
    galaxy_index[i]=np.where(data.loc[:,'mangaid']==galaxy_list[i])[0][0]

galaxy_index=np.array(galaxy_index,dtype=int) #Ensuring we have array that can be used to index, force int 

galaxies=data.iloc[galaxy_index] #DF of galaxies which satisfies the condition, contains all relevant schema data 

#Creating the arrays of the independent variables were are interested in, and dependent variable n 

mass=galaxies.loc[:,'nsa_sersic_mass']
log_mass=np.log10(mass)

SFR=galaxies.loc[:,'sfr_tot']
log_SFR=np.log10(SFR)

ha_flux=galaxies.loc[:,'emline_gflux_tot_ha_6564']

n=galaxies.loc[:,'nsa_sersic_n']
n=np.array(n,dtype=np.float32)
n=StandardScaler().fit_transform(n.reshape(-1,1))
n=torch.from_numpy(n).to('cuda:0')






# Prep the input data to go into a DataLoader 

In [8]:

inputs=[log_SFR,log_mass,ha_flux]
inputs_transformed=[]

def data_preparer(inputs):  
    """
    Takes in a list in which each element is an input variable and then preps
    it accordingly to return it as one combined GPU pytorch tensor. 
    """
    

    #makes all inputs np arrays of np.float32
    #and makes scailing to mean 0 and std of 1
    for i in range (len(inputs)):
        inputs_transformed.append(StandardScaler().fit_transform(np.array(inputs[i],dtype=np.float32).reshape(-1,1))) 
                                                                                                    
    reshape=np.column_stack(inputs_transformed)
    out=torch.from_numpy(reshape).to('cuda:0')
    return(out)

inputs_tensor=data_preparer(inputs)

print(np.shape(inputs_tensor))
print(np.shape(n))

#Create Tensor Datasets 
train_ds, test_ds, validate_ds=torch.utils.data.random_split(TensorDataset(inputs_tensor,n),[2183,727,728]) #Better way to automate these splits? 

#Create Data Loaders
train_dl=DataLoader(train_ds,batch_size=64,shuffle=True)
test_dl=DataLoader(test_ds,batch_size=64,shuffle=True)
validate_dl=DataLoader(validate_ds,batch_size=64,shuffle=True)

print(train_ds[0])



torch.Size([3638, 3])
torch.Size([3638, 1])
(tensor([ 0.1383, -0.5002, -0.2693], device='cuda:0'), tensor([-0.8928], device='cuda:0'))


# The Model (N inputs, 1 output, CUDA Enabled)

In [18]:

inputDim=len(inputs)
outputDim=1 
hiddenSize=50
learningRate=0.1

model = linearRegression(inputDim, outputDim,hiddenSize)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

#Training Loop
epochs=1800

epoch_array=np.zeros(epochs)
loss_array=np.zeros(epochs)
for epoch in range(epochs): #Forward Pass and loss
    for xb,yb in train_dl:
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model(xb)

        # get loss for the predicted output
        loss = criterion(outputs, yb)
        # print(loss)
        # get gradients w.r.t to parameters, (backward pass)
        loss.backward()

        # update parameters
        optimizer.step()

        epoch_array[epoch]=epoch 
        loss_array[epoch]=loss.item()

        if epoch %100==0:
            print('epoch {}, loss {}'.format(epoch, loss.item()))

with torch.no_grad(): # we don't need gradients in the testing phase
    predicted = model(inputs_tensor)
    print(predicted)




3984270989894867
epoch 200, loss 0.3185429275035858
epoch 200, loss 0.32043349742889404
epoch 200, loss 0.5441081523895264
epoch 200, loss 0.50640869140625
epoch 200, loss 0.3517458438873291
epoch 200, loss 0.2685655951499939
epoch 200, loss 0.35537564754486084
epoch 200, loss 0.317348450422287
epoch 200, loss 0.6004351377487183
epoch 200, loss 0.39534157514572144
epoch 200, loss 0.703680694103241
epoch 200, loss 0.32276690006256104
epoch 200, loss 0.3365658223628998
epoch 200, loss 0.33886951208114624
epoch 200, loss 0.6516562104225159
epoch 200, loss 0.3330531716346741
epoch 200, loss 0.37449103593826294
epoch 200, loss 0.41184377670288086
epoch 200, loss 0.337708055973053
epoch 200, loss 0.3145672082901001
epoch 200, loss 0.5260970592498779
epoch 200, loss 0.36772382259368896
epoch 200, loss 0.4012555181980133
epoch 200, loss 0.456193745136261
epoch 200, loss 0.41922104358673096
epoch 200, loss 0.5525802969932556
epoch 200, loss 0.46274876594543457
epoch 200, loss 0.2669677734375
ep

In [19]:
from mpl_toolkits.mplot3d import Axes3D

%matplotlib qt
fig=plt.figure()
ax= plt.axes(projection='3d')
ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0].flatten(),inputs_tensor.cpu().detach().numpy()[:,1].flatten(),predicted.cpu().detach().numpy().flatten(),'red')
ax.scatter3D(inputs_tensor.cpu().detach().numpy()[:,0],inputs_tensor.cpu().detach().numpy()[:,1],n.cpu().detach().numpy(),alpha=0.5)


# plt.scatter(inputs_tensor.cpu().detach().numpy(),n.cpu().detach().numpy(),alpha=0.2)
# plt.scatter(inputs_tensor.cpu().detach().numpy(),predicted.cpu().detach().numpy())


<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f9a287bbad0>

# Changing Epoch while Keeping Learning Rate and Hidden Size Constant 

In [62]:
inputDim=len(inputs)
outputDim=1 
hiddenSize=50
learningRate=0.1

model = linearRegression(inputDim, outputDim,hiddenSize)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

reps=50
last_loss=np.zeros(reps) #Stores final loss returned value after training loop is done for a given number of epoch
epoch_range=np.linspace(400,4000,num=reps,dtype=int)


for i in range (reps):
    #Training Loop
    epochs=epoch_range[i]
    
    # epoch_array=np.zeros(epochs)
    loss_array=np.zeros(epochs)
    for epoch in range(epochs): #Forward Pass and loss
        for xb,yb in train_dl:
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model(xb)

            # get loss for the predicted output
            loss = criterion(outputs, yb)
            # print(loss)
            # get gradients w.r.t to parameters, (backward pass)
            loss.backward()

            # update parameters
            optimizer.step()

            # epoch_array[epoch]=epoch 
            loss_array[epoch]=loss.item()
    
 
    last_loss[i]=loss_array[-1] #Store final loss value
    print('Done rep ',i)

Done rep  0
Done rep  1
Done rep  2
Done rep  3
Done rep  4
Done rep  5
Done rep  6
Done rep  7
Done rep  8
Done rep  9
Done rep  10
Done rep  11
Done rep  12
Done rep  13
Done rep  14
Done rep  15
Done rep  16
Done rep  17
Done rep  18
Done rep  19
Done rep  20
Done rep  21
Done rep  22
Done rep  23
Done rep  24
Done rep  25
Done rep  26
Done rep  27
Done rep  28
Done rep  29
Done rep  30
Done rep  31
Done rep  32
Done rep  33
Done rep  34
Done rep  35
Done rep  36
Done rep  37
Done rep  38
Done rep  39
Done rep  40
Done rep  41
Done rep  42
Done rep  43
Done rep  44
Done rep  45
Done rep  46
Done rep  47
Done rep  48
Done rep  49


In [66]:
plt.title('Number of Epochs and Final Value of Loss')
plt.ylabel('Last Loss Value')
plt.xlabel('Number of Epochs Trained')
plt.plot(epoch_range,last_loss)

[<matplotlib.lines.Line2D at 0x7ff213273a50>]

# Changing Hidden Size while Keeping Learning Rate and Epoch Constant 

In [68]:
reps=50
hidden_size_range=np.linspace(10,200,num=reps,dtype=int)
last_loss_hidden_size=np.zeros(reps) #Stores final loss returned value after training loop is done for a given number of epoch

for i in range (reps):

    inputDim=len(inputs)
    outputDim=1 
    hiddenSize=hidden_size_range[i]
    learningRate=0.1

    model = linearRegression(inputDim, outputDim,hiddenSize)
    ##### For GPU #######
    if torch.cuda.is_available():
        model.cuda()

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

    #Training Loop
    epochs=1000 

    
    loss_array=np.zeros(epochs)
    for epoch in range(epochs): #Forward Pass and loss
        for xb,yb in train_dl:
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model(xb)

            # get loss for the predicted output
            loss = criterion(outputs, yb)
            # print(loss)
            # get gradients w.r.t to parameters, (backward pass)
            loss.backward()

            # update parameters
            optimizer.step()

            loss_array[epoch]=loss.item()


    last_loss_hidden_size[i]=loss_array[-1] #Store final loss value
    print('Done rep ',i)


Done rep  0
Done rep  1
Done rep  2
Done rep  3
Done rep  4
Done rep  5
Done rep  6
Done rep  7
Done rep  8
Done rep  9
Done rep  10
Done rep  11
Done rep  12
Done rep  13
Done rep  14
Done rep  15
Done rep  16
Done rep  17
Done rep  18
Done rep  19
Done rep  20
Done rep  21
Done rep  22
Done rep  23
Done rep  24
Done rep  25
Done rep  26
Done rep  27
Done rep  28
Done rep  29
Done rep  30
Done rep  31
Done rep  32
Done rep  33
Done rep  34
Done rep  35
Done rep  36
Done rep  37
Done rep  38
Done rep  39
Done rep  40
Done rep  41
Done rep  42
Done rep  43
Done rep  44
Done rep  45
Done rep  46
Done rep  47
Done rep  48
Done rep  49


In [73]:
plt.title('Hidden Size and Final Value of Loss')
plt.ylabel('Last Loss Value')
plt.xlabel('Hidden Size')
plt.plot(hidden_size_range,last_loss_hidden_size)

[<matplotlib.lines.Line2D at 0x7ff18397d210>]

# Changing Hidden Size while Keeping Learning Rate and Epoch Constant 

In [84]:
reps=50
learningRate_range=np.linspace(0.01,1,num=reps,dtype=float)
last_loss_learningRate=np.zeros(reps) #Stores final loss returned value after training loop is done for a given number of epoch

for i in range (reps):

    inputDim=len(inputs)
    outputDim=1 
    hiddenSize= 50
    learningRate=learningRate_range[i]

    model = linearRegression(inputDim, outputDim,hiddenSize)
    ##### For GPU #######
    if torch.cuda.is_available():
        model.cuda()

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learningRate)

    #Training Loop
    epochs= 1000 

    
    loss_array=np.zeros(epochs)
    for epoch in range(epochs): #Forward Pass and loss
        for xb,yb in train_dl:
            # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
            optimizer.zero_grad()

            # get output from the model, given the inputs
            outputs = model(xb)

            # get loss for the predicted output
            loss = criterion(outputs, yb)
            # print(loss)
            # get gradients w.r.t to parameters, (backward pass)
            loss.backward()

            # update parameters
            optimizer.step()

            loss_array[epoch]=loss.item()


    last_loss_learningRate[i]=loss_array[-1] #Store final loss value
    print('Done rep ',i)

Done rep  0
Done rep  1
Done rep  2
Done rep  3
Done rep  4
Done rep  5
Done rep  6
Done rep  7
Done rep  8
Done rep  9
Done rep  10
Done rep  11
Done rep  12
Done rep  13
Done rep  14
Done rep  15
Done rep  16
Done rep  17
Done rep  18
Done rep  19
Done rep  20
Done rep  21
Done rep  22
Done rep  23
Done rep  24
Done rep  25
Done rep  26
Done rep  27
Done rep  28
Done rep  29
Done rep  30
Done rep  31
Done rep  32
Done rep  33
Done rep  34
Done rep  35
Done rep  36
Done rep  37
Done rep  38
Done rep  39
Done rep  40
Done rep  41
Done rep  42
Done rep  43
Done rep  44
Done rep  45
Done rep  46
Done rep  47
Done rep  48
Done rep  49


In [85]:
plt.title('Learning Rate and Final Value of Loss')
plt.ylabel('Last Loss Value')
plt.xlabel('Learning Rate')
plt.plot(learningRate_range,last_loss_learningRate)



[<matplotlib.lines.Line2D at 0x7ff17a9f9b50>]