In [None]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import random
from PIL import Image
from cv2 import imread
import matplotlib.pyplot as plt

In [None]:
!unzip -q ../input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip
!unzip -q ../input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip

In [None]:
train_set = pd.read_csv('training_solutions_rev1.csv')
files = os.listdir('./images_training_rev1')

train_set.head()

In [None]:
plt.figure(1, figsize=(9, 9))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  random_img = './images_training_rev1/'+random.choice(files)
  imgs = imread(random_img)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

In [None]:
!pip install -q torchsummary
!pip install -q livelossplot

In [None]:
import torch
import torchvision
from livelossplot import PlotLosses
from torchsummary import summary
from tqdm import tqdm
import scipy.stats as stats
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms
import torchmetrics
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader, random_split
from torch.utils.data.sampler import SubsetRandomSampler

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

In [None]:
train_folder = './images_training_rev1'
test_folder = './images_test_rev1/'

In [None]:
class GalaxyDataSet(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, index):
        label = self.y[index]
        image = self.x[index]
        image = Image.fromarray((image * 255).astype(np.uint8))
        image = transforms.ToTensor()(image)
        return image, label

In [None]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(train_set, test_size=.2)
df_train.shape, df_test.shape

In [None]:
from skimage.transform import resize

ORIG_SHAPE = (424,424)
CROP_SIZE = (256,256)
IMG_SHAPE = (64,64)

def get_image(path, x1,y1, shape, crop_size):
    x = plt.imread(path)
    
    x = x[x1:x1+crop_size[0], y1:y1+crop_size[1]]
    x = resize(x, shape)
    return x
    
def get_all_images(dataframe, shape=IMG_SHAPE, crop_size=CROP_SIZE):
    x1 = (ORIG_SHAPE[0]-CROP_SIZE[0])//2
    y1 = (ORIG_SHAPE[1]-CROP_SIZE[1])//2
   
    sel = dataframe.values
    ids = sel[:,0].astype(int).astype(str)
    y_batch = sel[:,1:]
    x_batch = []
    for i in tqdm(ids):
        x = get_image(train_folder+'/'+i+'.jpg', x1,y1, shape=shape, crop_size=crop_size)
        x_batch.append(x)
    x_batch = np.array(x_batch)
    return x_batch, y_batch
        
X_train, y_train = get_all_images(df_train)
X_test, y_test = get_all_images(df_test)

In [None]:
TrainDataSet = GalaxyDataSet(X_train, y_train)
TestDataSet = GalaxyDataSet(X_test, y_test)

In [None]:
batch_size = 1

trainloader = DataLoader(TrainDataSet, batch_size = batch_size)
valloader = DataLoader(TestDataSet, batch_size = batch_size)

In [None]:
class GalaxyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.sq = nn.Sequential(
            nn.Conv2d(3, 512, (3,3)),
            nn.Conv2d(512, 256, (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(256, 256, (3,3)),
            nn.Conv2d(256, 128, (3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(128, 128, (3,3)),
            nn.Conv2d(128, 128, (3,3)),
            nn.ReLU(),  
            nn.AdaptiveMaxPool2d((1,1))
        )
        self.d = nn.Sequential(
            nn.Dropout(0.25),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(128, 37),
            nn.Sigmoid()
        )
    
    def forward(self, res):
        res = self.sq(res)
        res = res.reshape(res.shape[0],128)
        res = self.d(res)
        return res

model = GalaxyModel().to(device)
summary(model, input_size = (3, 64, 64))

In [None]:
epochs = 15
alpha = 0.001
optimizer = optim.Adamax(model.parameters(), lr = alpha)
criterion = nn.BCELoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

In [None]:
def eval_loop(model, valloader, criterion, optimizer):
    model.eval()
    tr_loss = 0.0
    ep = 0
    precision = 0
    with torch.no_grad():
        for i, (data, label) in enumerate(tqdm(valloader)):
            ep += 1
            data, label = data.to(device), label.to(device)
            pred = model(data).type(torch.float64)
            loss = criterion(pred, label)
            tr_loss += loss.item()
            precision += mean_squared_error(pred.cpu().detach().numpy(), label.cpu().detach().numpy())
    return tr_loss/len(valloader), 100*precision/ep

In [None]:
def training_loop(model, trainloader, criterion, optimizer, scheduler):
    model.train()
    tr_loss = 0.0
    ep = 0
    precision = 0
    for i, (data, label) in enumerate(tqdm(trainloader)):
        ep += 1
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        pred = model(data).type(torch.float64)
        loss = criterion(pred, label)
        
        loss.backward()
        optimizer.step()
        tr_loss += loss.item()
        precision += mean_squared_error(pred.cpu().detach().numpy(), label.cpu().detach().numpy())
#     scheduler.step()
    return tr_loss/len(trainloader), 100*precision/ep#100*correct/(correct + wrong)

In [None]:
def train_model(model,trainloader, valloader, criterion, optimizer, scheduler, epoch):
    print('-'*5+'Training'+'-'*5)
    liveloss = PlotLosses()
    
    for ep in range(epoch):
        logs = {}
        print(f'Training epoch: {ep+1}')
        t_loss, t_acc = training_loop(model, trainloader,criterion, optimizer, scheduler)
        v_loss, v_acc = eval_loop(model, valloader, criterion, optimizer)
        print(f'Training loss:{t_loss} Training accuracy: {t_acc}')
        print(f'Validation loss: {v_loss} Validation accuracy: {v_acc}')
        logs['log loss'] = t_loss
        logs['val_log loss'] = v_loss
        liveloss.update(logs)
        liveloss.send()
    print('Finished Training')

In [None]:
train_model(model,trainloader, valloader, criterion, optimizer, scheduler, epochs)

In [None]:
model.eval()
dummy_input, _ = DataLoader(TrainDataSet[0])
torch.onnx.export(model,
         dummy_input.cuda() if device == "cuda" else dummy_input,
         "galaxy_classification.onnx",
         export_params=True,
         opset_version=10,
         do_constant_folding=True,
         input_names = ['modelInput'],
         output_names = ['modelOutput'],
         dynamic_axes={'modelInput' : {0 : 'batch_size'},
                                'modelOutput' : {0 : 'batch_size'}}) 
print('Model has been converted to ONNX')

In [None]:
!unzip -q -o ../input/galaxy-zoo-the-galaxy-challenge/images_test_rev1.zip

In [None]:
testfiles = os.listdir(test_folder)

In [None]:
def test_image_generator(ids, shape=IMG_SHAPE):
    x1 = (ORIG_SHAPE[0]-CROP_SIZE[0])//2
    y1 = (ORIG_SHAPE[1]-CROP_SIZE[1])//2
    x_batch = []
    for i in ids:
        x = get_image(test_folder+i, x1, y1, shape=IMG_SHAPE, crop_size=CROP_SIZE)
        x_batch.append(x)
    x_batch = np.array(x_batch)
    return x_batch

In [None]:
batch_size = 1
val_predictions = []
N_val = len(testfiles)
import gc
with torch.no_grad():
    for i in tqdm(np.arange(0, N_val, batch_size)):
        if i+batch_size > N_val:
            upper = N_val
        else:
            upper = i+batch_size
        X = test_image_generator(testfiles[i:upper]).reshape(64,64,-1)
        X = Image.fromarray((X * 255).astype(np.uint8))
        X = transforms.ToTensor()(X)
        y_pred = model(X[None, ...].to(device))
        val_predictions.append(y_pred)

val_predictions = np.array(val_predictions)

In [None]:
predi = []
for i in val_predictions:
    predi.append(i.cpu().numpy())
predi = np.array(predi).reshape(-1,37)

In [None]:
predi.shape
Y_pred = np.vstack(predi)
ids = np.array([v.split('.')[0] for v in testfiles]).reshape(len(testfiles),1)
submission_df = pd.DataFrame(np.hstack((ids, Y_pred)), columns=train_set.columns)
submission_df = submission_df.sort_values(by=['GalaxyID'])
submission_df.to_csv('sample_submission.csv', index=False)

In [None]:
submission_df.head()