In [1]:
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F
import time
import os
import h5py
import torchvision
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset

from torchvision import transforms
from torch.utils import data
from sklearn.model_selection import KFold

datadir = ""
training_set = datadir + 'DR12Q-63000.h5'


In [2]:
f = h5py.File(training_set, 'r') 
f.keys()

<KeysViewHDF5 ['FLUX', 'PLATE_F', 'PLATE_T', 'Z_DR12Q_VI', 'Z_PCA', 'Z_PIPE', 'Z_QN']>

In [3]:
X = f['FLUX'][()]  
Y = f['Z_DR12Q_VI'][()]
Z= f['Z_QN'][()]
W= f['Z_PIPE'][()]
S= f['Z_PCA'][()]



In [4]:
X.shape,Y.shape

((63100, 2000), (63100, 1))

In [5]:
#features = torch.Tensor(X).view(-1,4000)
#labels = torch.Tensor(Y).view(-1,1)

label_std = (Y.std()**1.0) 
label_mean = Y.mean() 
labels_norm = (Y - label_mean)/label_std

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X ,labels_norm, test_size= 0.1,random_state = 42 )
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X ,Z, test_size= 0.1,random_state = 42 )
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(X ,W, test_size= 0.1,random_state = 42 )
X_train3, X_test3, Y_train3, Y_test3 = train_test_split(X ,S, test_size= 0.1,random_state = 42 )

In [7]:
X_train=torch.Tensor(X_train).view(-1,2000)
Y_train=torch.Tensor(Y_train).view(-1,1)
X_test=torch.Tensor(X_test).view(-1,2000)
Y_test=torch.Tensor(Y_test).view(-1,1)

Y_test1=torch.Tensor(Y_test1).view(-1,1)
Y_test2=torch.Tensor(Y_test2).view(-1,1)
Y_test3=torch.Tensor(Y_test3).view(-1,1)

#Z_test=torch.Tensor(Z_test).view(-1,1)



In [8]:
#dataset_train = TensorDataset(X_train, Y_train)
#dataset_test = TensorDataset(X_test, Y_test)
#dataset_QN = TensorDataset(X_test, Z_test)

In [9]:
#self.layer4 = nn.Sequential(
 #           nn.Conv1d(36, 20, 12),
 #           nn.ReLU(),
 #           nn.MaxPool1d(2, stride=2))

In [10]:
class RNet(nn.Module):
    def __init__(self):
        super(RNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 60, 200),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv1d(60, 70,200),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2))
        self.layer3 = nn.Sequential(
            nn.Conv1d(70, 36, 32),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Sequential(
            nn.Linear(5724, 900),
            nn.ReLU())
        self.fc2 = nn.Sequential(
            nn.Linear(900, 100),
            nn.ReLU())
        self.fc3 = nn.Linear(100, 1)

    def forward(self, x):
        x = x.unsqueeze(1)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [11]:
class FNet(nn.Module): #200-200-32
    
    def __init__(self):
        super(FNet, self).__init__()
        self.C1 = nn.Conv1d(1, 60, 200)
        self.S2 = nn.MaxPool1d(2, stride=2)
        self.C3 = nn.Conv1d(60, 40,200)
        self.S4 = nn.MaxPool1d(2, stride=2)
        self.C5 = nn.Conv1d(40, 36, 32)
        self.S6 = nn.MaxPool1d(2, stride=2)
        self.D7 = nn.Dropout()
        self.F8 = nn.Linear(5724, 900)
        self.F9= nn.Linear(900, 100)
        self.Out= nn.Linear(100, 1)
        
    def forward(self, x):
        x = x.unsqueeze(1)
        x = F.relu(self.C1(x))
        x = self.S2(x)
        x = F.relu(self.C3(x))
        x = self.S4(x)
        x = F.relu(self.C5(x))
        x = self.S6(x)
        x = self.D7(x)
        x = x.view(x.size(0),-1)
        x = F.relu(self.F8(x))
        x = F.relu(self.F9(x))
        x = self.Out(x)
        return x
    

In [12]:
model= RNet()
print(model)

RNet(
  (layer1): Sequential(
    (0): Conv1d(1, 60, kernel_size=(200,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv1d(60, 70, kernel_size=(200,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv1d(70, 36, kernel_size=(32,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Sequential(
    (0): Linear(in_features=5724, out_features=900, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=900, out_features=100, bias=True)
    (1): ReLU()
  )
  (fc3): Linear(in_features=100, out_features=1, bias=True)
)


In [79]:
from skorch import NeuralNet
from skorch.callbacks import LRScheduler
from torch.optim.lr_scheduler import CyclicLR

#from skorch.dataset import CVSplit
net = NeuralNet(model,
               criterion =nn.MSELoss,
               max_epochs =3,
               batch_size=100,
                optimizer=torch.optim.Adam,
                lr =0.00000104, #1:0.00004,
                optimizer__weight_decay=1e-4,#0.0005,
                #optimizer__epsilon=1e-08,
                #optimizer__alpha=0.1,
                optimizer__betas=(0.9, 0.99),
               device ='cuda') 

In [80]:
#import gc

#gc.collect()

#torch.cuda.empty_cache()

In [81]:
#from skorch import NeuralNet
#from skorch.callbacks import LRScheduler
#from torch.optim.lr_scheduler import CyclicLR

#from skorch.dataset import CVSplit
#net = NeuralNet(model,
 #              criterion =nn.MSELoss,
  #             max_epochs =100,
   #            batch_size=140,
    #            optimizer=torch.optim.Adagrad,#torch.optim.Adam,
     #           lr =0.00024,
                #optimizer__betas=(0.9, 0.999),
      #          optimizer__lr_decay=0.00001,
       #         #optimizer__weight_decay=0.001,
        #       device ='cuda') 

In [None]:
net.fit(X_train,Y_train)

  epoch    train_loss    valid_loss       dur
-------  ------------  ------------  --------
      1        [36m0.0011[0m        [32m0.0071[0m  121.9816


In [None]:
#from skorch import NeuralNet
#from skorch.dataset import CVSplit
#net = NeuralNet(model,
 #              criterion =nn.MSELoss,
  #             max_epochs =20,
   #            batch_size=180,
    #            optimizer=torch.optim.Adam,
     #           lr =0.000000240,
      #         device ='cuda') 
#net.fit(X_train,Y_train)

In [None]:
#from skorch import NeuralNet
#from skorch.dataset import CVSplit
#net = NeuralNet(model,
 #              criterion =nn.MSELoss,
  #             max_epochs =1,
   #            batch_size=160,
    #            optimizer=torch.optim.Adam,
     #           lr =0.000000090,
      #         device ='cuda') 
#net.fit(X_train,Y_train)

In [None]:
Losses =net.history


In [None]:
valid_loss = net.history[:,'valid_loss']


In [None]:
 train_loss = net.history[:,'train_loss']

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 
plt.xlabel('Epoches')
plt.ylabel('validaton loss, train loss')
plt.plot(valid_loss)
plt.plot(train_loss)
plt.show()

In [None]:
Y_pred = net.predict(X_test)
print(Y_pred)

In [None]:
len(Y_test)

In [None]:
Y_pred

In [None]:
import matplotlib.pyplot as plt
labels_norm = (Y - label_mean)/label_std

Z = Y_pred*label_std+label_mean
y = Y_test*label_std+label_mean

x1=np.linspace(0,7,9)
y1=x1
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.scatter(y, Z, s=10, c='b', marker="s")
ax1.scatter(y, Y_test1, s=10, c='r', marker="s")
plt.plot(x1,y1)
plt.xlabel('test')
plt.ylabel('predicted redshift')
plt.xlim(0.1,7.1)
plt.ylim(0.0,7.1)
plt.show()

In [None]:
REL=300000*(Z-y.numpy())/(1+y.numpy())
REL1=300000*(Y_test1.numpy()-y.numpy())/(1+y.numpy())
REL2=300000*(Y_test2.numpy()-y.numpy())/(1+y.numpy())
REL3=300000*(Y_test3.numpy()-y.numpy())/(1+y.numpy())
abs(REL).mean()

In [None]:
for i in range(len(REL1)):
    if ((abs(REL[i])<6000)):
        print(i, file=open("Rahim.txt", "a"))

In [None]:
file = open('Rahim.txt', "r+")
file.truncate()
for i in range(len(REL)):
    if ((y.numpy()[i]>2.0)):
        print(i, file=open("Rahim.txt", "a"))
        
with open("Rahim.txt","r") as f:
    print(len(f.readlines()))

In [None]:
file = open('Rahim.txt', "r+")
file.truncate()
for i in range(len(REL)):
    if ((abs(REL[i])<6000)):
        print(i, file=open("Rahim.txt", "a"))
        
with open("Rahim.txt","r") as f:
    print(len(f.readlines())/len(REL))

In [None]:
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import pyplot
#pyplot.yscale('log')
data = REL
plt.figure(figsize=(8,10))

sns.set_style('white')

plt.hist(data, bins=10000,histtype='step',alpha=0.3,edgecolor='r',linewidth=2,label='FNet',color = 'blue')
plt.hist(REL1, bins=10000,histtype='step',alpha=0.3,edgecolor='b',linewidth=2,label='QuasarNet')
#plt.hist(REL2, bins=6000,histtype='step',alpha=0.3,edgecolor='g',linewidth=2,label='ZPIPW')
plt.hist(REL3, bins=10000,histtype='step',alpha=0.3,edgecolor='black',linewidth=2,label='PCA')

plt.ylabel('QSO number', fontsize=20)
plt.yticks(fontsize=20) 

plt.xlabel(' ∆ν', fontsize=20)
plt.xticks(fontsize=20) 


plt.xlim(-6000,6000)
plt.legend(loc='best', fontsize=20) 
#plt.ylim(0,7)
plt.show() 

In [52]:
file = open('Rahim1.txt', "r+")
file.truncate()
for i in range(len(REL)):
    if ((abs(REL[i])>6000)):
        print(REL[i], file=open("Rahim1.txt", "a"))
with open("Rahim1.txt","r") as f:
    print(1-len(f.readlines())/len(REL))

FileNotFoundError: [Errno 2] No such file or directory: 'Rahim1.txt'

In [None]:
hi

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 20, 200),
            nn.ReLU(),
            nn.AvgPool1d(2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv1d(20, 20,200),
            nn.ReLU(),
            nn.AvgPool1d(2, stride=2))
        self.layer3 = nn.Sequential(
            nn.Conv1d(20, 36, 32),
            nn.ReLU(),
            nn.AvgPool1d(2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(12600, 1000)
        self.fc2 = nn.Linear(1000, 500)
        self.fc3 = nn.Linear(500, 1)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [None]:
from skorch import NeuralNet
from skorch.callbacks import LRScheduler
from torch.optim.lr_scheduler import CyclicLR

#from skorch.dataset import CVSplit
net = NeuralNet(model,
               criterion =nn.MSELoss,
               max_epochs =2,
               batch_size=110,
                optimizer=torch.optim.SGD,
                callbacks=[
        ('lr_scheduler',
         LRScheduler(policy=CyclicLR,
                     base_lr=0.000001,
                     max_lr=0.0001)),
    ],
               device ='cuda') 