<h1>CNNs for EPRV on HARPS</h1>
The goal here is to training a CNN using HARPS images to the outputs of the HARPS EPRV extraction pipeline to see it a large of NN can replicated more explicit modeling.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

<h2>Model definition</h2>

In [2]:
class DownSizeNet(nn.Module):
    def __init__(self, in_size, out_size, normalize=True, leaky_slope=0.2):
        super(DownSizeNet, self).__init__()
        layers = [nn.Conv2d(in_size, out_size, 3, stride=2, padding=1, bias=False).double()]
        if normalize:
            layers.append(nn.BatchNorm2d(out_size, 0.8).double())
        layers.append(nn.LeakyReLU(leaky_slope).double())
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

class RV_Model(nn.Module):
    def __init__(self):
        super(RV_Model, self).__init__()
#         channels_in, self.h, self.w = in_shape
#         channels_out, _, _ = out_shape

#         self.fc    = nn.Linear(latent_dim, self.h * self.w)

        self.down1 = DownSizeNet(3, 64, normalize=False)
        self.down2 = DownSizeNet(64, 128)
        self.down3 = DownSizeNet(128, 256)
        self.down4 = DownSizeNet(256, 512)
        self.down5 = DownSizeNet(512, 512)
        self.down6 = DownSizeNet(512, 512)
#         self.down7 = DownSizeNet(512, 1, normalize=False)
        
        self.final = nn.Sequential(
            nn.Conv2d(512, 1, 3, stride=1, padding=1).double(), nn.Tanh().double()
        )
        
        
    def forward(self, x):
        # Propogate noise through fc layer and reshape to img shape
#         z = self.fc(z).view(z.size(0), 1, self.h, self.w)
        d1 = self.down1(x)
#         print('d1: {}'.format(d1.shape))
        d2 = self.down2(d1)
#         print('d2: {}'.format(d2.shape))
        d3 = self.down3(d2)
#         print('d3: {}'.format(d3.shape))
        d4 = self.down4(d3)
#         print('d4: {}'.format(d4.shape))
        d5 = self.down5(d4)
#         print('d5: {}'.format(d5.shape))
        d6 = self.down6(d5)
#         print('d6: {}'.format(d6.shape))
#         d7 = self.down7(d6)
#         print('d7: {}'.format(d7.shape))
        
        
        return self.final(d6)

In [3]:
from torch.utils.data import Dataset
import glob

In [4]:
from astropy.io import fits

<h2>Dataset importing</h2>
The question here is how is the data organized in the directory and how can it be imported with the target RV. 

Not the OG data but after the data is saved from the pre processing step.

In [5]:
import pickle

In [6]:
import numpy as np
import h5py as h5
filename = "../data/HARPS/PEG51/51Peg_1020-1546_1530-2056.h5"

# ds = h5.File(filename,'r')

In [7]:
def get_length(ds):
    total = 0
    address_book = []
    rvs = []
    
    for x,v in ds['visits'].items():
        for k in ds['images'][x].keys():
            value = np.sum(np.array(ds['images'][x][k]).shape)
            if value != 0:
                address_book.append([x,'','',k])
                rvs.append(np.double(v.attrs['ESO DRS CCF RVC']))
                flat_bool = False
                cali_bool = False
                for key in v.attrs.keys():
                    if v.attrs[key] == 'FLAT' and not flat_bool:
                        address_book[-1][1] = key
                        flat_bool = True
                    if v.attrs[key] == 'THAR_THAR' and not cali_bool:
                        address_book[-1][2] = key
                        cali_bool = True
                
                total += 1
    return total, address_book, rvs
# length, address_book, rvs = get_length(ds)

In [74]:
class RV_Dataset(Dataset):
    def __init__(self, h5_file):
        self.h5_file = h5_file
        self.type = torch.Tensor
        self.ds = h5.File(h5_file,'r')
        self.length, self.address, self.rvs = get_length(self.ds)
        print(self.length,len(self.rvs))

    def __getitem__(self, index):
        
        image = np.stack((np.array(self.ds['images'][self.address[index][0]][self.address[index][-1]])
                         ,np.array(self.ds['images'][self.address[index][1]][self.address[index][-1]])
                         ,np.array(self.ds['images'][self.address[index][2]][self.address[index][-1]])),axis=0)
        return {'img': self.type(image).double(), 'rvs': self.rvs[index]}
    
    
    def __len__(self):
        return self.length


In [75]:
# from astropy.nddata import Cutout2D
# from astropy import units as u

In [76]:
import os.path

<h2>Defining Fitting Process</h2>
including hyperparameters, the loss function, and the optimization algo

In [77]:
lr = 0.001#, betas=(, ),
b1 = 0.9
b2 = 0.999

In [78]:
model = RV_Model()
mse_loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(b1, b2))

In [79]:
# directory = something
batch_size = 2
n_cpu = 1
dataloader = DataLoader(
    RV_Dataset(filename),
    batch_size=batch_size,
    shuffle=True,
    num_workers=n_cpu,
)

# validloader = DataLoader(
#     RV_Dataset(xs_valid,ys_valid),
#     batch_size=batch_size,
#     shuffle=True,
#     num_workers=n_cpu,
# )

622 622


In [80]:
import time
import datetime

<h2>Training Step</h2>
the working step!

In [81]:
import sys

In [84]:
prev_time = time.time()
n_epochs = 1
# validiter = iter(validloader)
for epoch in range(n_epochs):
    for i, batch in enumerate(dataloader):
#         def into_func():
        optimizer.zero_grad()
#         print(batch['img'].shape)
        something = model(batch['img'])
#         print(something.shape)
#         y = torch.mean(something)
        y = something.view(batch['img'].shape[0], -1).mean(1)
#         print(y,batch['rvs'])
        loss = mse_loss(y,batch['rvs'].double())
        loss.backward()
        optimizer.step()

            
#         into_func()
#         if i % 5 == 0:
#             imgs, target_rv = validiter.next()
#             optimizer.zero_grad()
#             something = model(imgs)
#             y = torch.mean(something)
#             loss = mse_loss(y,target_rv.double())
            
#             sys.stdout.write(
#                 "\r[Epoch %d/%d] [Batch %d/%d] [Loss: %f]"
#                 % (
#                     epoch,
#                     n_epochs,
#                     i,
#                     len(dataloader),
#                     loss.item()
#                 )  
#         )

torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([-33.1488, -33.1921])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([-33.0994, -33.1500])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([ -3.0676, -21.0807])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([-33.1923, -15.8469])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([-33.1052, -33.0986])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1.0000, -1.0000], dtype=torch.float64, grad_fn=<MeanBackward1>) tensor([-14.8080, -33.0953])
torch.Size([2, 3, 526, 526])
torch.Size([2, 1, 9, 9])
tensor([-1

KeyboardInterrupt: 