In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from PIL import Image
import os
import cv2
from tqdm import tqdm

In [None]:
# !pip install timm
# import timm

In [None]:
import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [None]:
np.random.seed(1)

In [None]:
train = pd.read_csv('../input/fishdata/dataset/train.csv')

In [None]:
path = '../input/fishdata/dataset/train/'

imgs = []
label = []
for name in sorted(os.listdir(path)): 
  for i in sorted(os.listdir(path+name)):
    if 'json' in i:
      continue
    imgs.append(path+name+'/'+i)
    label.append(train[train['ImageDir'] == name]['AvgWeight'].values[0])

train_df = pd.DataFrame({'path': imgs, 'label': label})

In [None]:
path = '../input/fishdata/dataset/test/'

imgs = []
names = []
for name in sorted(os.listdir(path)): 
  for i in sorted(os.listdir(path+name)):
    if 'json' in i:
      continue
    imgs.append(path+name+'/'+i)
    names.append(name)
test_df = pd.DataFrame({'path': imgs, 'name':names})

In [None]:
# from sklearn.preprocessing import MinMaxScaler

# scaler = MinMaxScaler()
# train_df['label'] = scaler.fit_transform(train_df[:]['label'])

In [None]:
def get_train_augmentation(img_size, ver):
    if ver==1: # for validset
        transform = transforms.Compose([
#                 transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                ])

    if ver == 2:
        transform = transforms.Compose([
#                 transforms.RandomHorizontalFlip(),
#                 transforms.RandomAffine((20)),
#                 transforms.RandomRotation(180),
#                 transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
    
    
    return transform

In [None]:
# class Customset(Dataset):
#   def __init__(self, data, mode):
#     self.mode = mode
#     self.imgs = data['path']
#     if mode == 'train':
#       self.label = data['label']
#       self.transform = get_train_augmentation(256, 2)
#     elif mode == 'test':
#       self.names = data['name']
#       self.transform = get_train_augmentation(256, 1)

#   def __len__(self):
#     return len(self.imgs)

#   def __getitem__(self, index):
#     img = self.imgs[index]
#     img = cv2.imread(img)
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
#     img = img.astype(np.float32)/255
#     img = np.transpose(img, (2,0,1))
#     img = torch.tensor(img, dtype=torch.float32)
#     img = self.transform(img)
#     if self.mode == 'test':
#       name = self.names[index]
#       return img, name
#     label = self.label[index] + np.random.normal(0, 1, 1)
#     label = torch.tensor(label, dtype=torch.float32)

#     return img, label

In [None]:
class Customset(Dataset):
  def __init__(self, data, mode):
    self.mode = mode
    self.imgs = data['path']
    if mode == 'train':
      self.label = data['label']
    elif mode == 'test':
      self.names = data['name']

  def __len__(self):
    return len(self.imgs)

  def __getitem__(self, index):
    img = self.imgs[index]
    img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, dsize=(512, 512), interpolation=cv2.INTER_AREA)
    img = img.astype(np.float32)/255
    img = np.transpose(img, (2,0,1))
    img = torch.tensor(img, dtype=torch.float32)

    if self.mode == 'test':
      name = self.names[index]
      return img, name
    
    label = self.label[index] + np.random.normal(0, 1, 1)
    label = torch.tensor(label, dtype=torch.float32)

    return img, label

In [None]:
trainset = Customset(train_df, 'train')
testset = Customset(test_df, 'test')
train_loader = DataLoader(trainset, batch_size=16, shuffle=True, num_workers=4)
test_loader = DataLoader(testset, batch_size=16, shuffle=False, num_workers=4)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# class Network(nn.Module):
#     def __init__(self):
#         super(Network, self).__init__()
        
#         self.conv1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=5, stride=1, padding=1)
#         self.bn1 = nn.BatchNorm2d(128)
#         self.conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=2, padding=1)
#         self.bn2 = nn.BatchNorm2d(256)
#         self.pool = nn.MaxPool2d(2,2)
#         self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=5, stride=2, padding=1)
#         self.bn4 = nn.BatchNorm2d(256)
#         self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=5, stride=2, padding=1)
#         self.bn5 = nn.BatchNorm2d(256)
#         self.flatten = nn.Flatten()
#         self.fc1 = nn.Linear(57600, 1)
        
#         nn.init.xavier_uniform(self.conv1.weight)
#         nn.init.xavier_uniform(self.conv2.weight)
#         nn.init.xavier_uniform(self.conv4.weight)
#         nn.init.xavier_uniform(self.conv5.weight)
#         nn.init.xavier_uniform(self.fc1.weight)

#     def forward(self, input):
#         output = F.relu(self.bn1(self.conv1(input)))      
#         output = F.relu(self.bn2(self.conv2(output)))     
#         output = self.pool(output)                        
#         output = F.relu(self.bn4(self.conv4(output)))     
#         output = F.relu(self.bn5(self.conv5(output)))     
#         output = self.flatten(output)
#         output = self.fc1(output)

#         return output

# # Instantiate a neural network model 
# model = Network().to(device)
# print(model)

In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=5, stride=2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, stride=2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1)
        self.bn6 = nn.BatchNorm2d(512)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(492032, 1)  ##57600    115200
        
        nn.init.xavier_uniform(self.conv1.weight)
        nn.init.xavier_uniform(self.conv2.weight)
        nn.init.xavier_uniform(self.conv4.weight)
        nn.init.xavier_uniform(self.conv5.weight)
        nn.init.xavier_uniform(self.fc1.weight)

    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))      
        output = F.relu(self.bn2(self.conv2(output)))     
        output = self.pool(output)                        
        output = F.relu(self.bn4(self.conv4(output)))     
        output = F.relu(self.bn5(self.conv5(output)))
        output = F.relu(self.bn6(self.conv6(output)))
        output = self.flatten(output)
        output = self.fc1(output)

        return output

# Instantiate a neural network model 
model = Network().to(device)
print(model)

In [None]:
# class Network(nn.Module):
#     def __init__(self):
#         super(Network, self).__init__()
#         self.model = timm.create_model('efficientnet_b8', pretrained=False, num_classes=1)
# #         self.model = models.resnet50(pretrained=True, num_classes=1)
        
#     def forward(self, x):
#         x = self.model(x)
#         return x
    
# model = Network().to(device)

In [None]:
# class Network(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.encoder = timm.create_model('regnety_040', pretrained=True,
#                                     drop_path_rate=0.2,
#                                     )
        
#         num_head = self.encoder.head.fc.in_features
#         self.encoder.head.fc = nn.Linear(num_head, 1)

#     def forward(self, x):
#         x = self.encoder(x)
#         return x

# model = Network().to(device)

In [None]:
from torch.optim import Adam
 
# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
from torch.autograd import Variable

bestloss = 100
num_epochs = 5
for epoch in range(num_epochs):
    
    for i, (images, targets) in tqdm(enumerate(train_loader)):  ##715
        image = Variable(images.to(device))
        targets = Variable(targets.to(device))
        
        optimizer.zero_grad()
        
        outputs = model(image)
        losses = loss_fn(outputs, targets)

        losses.backward()
        optimizer.step()
        
        if (i) % 100 == 0:
            print('Epoch: {}, i: {}, loss: {}'.format(epoch+1, i, losses))
        
#         if bestloss > losses:
#             bestloss = losses
#             torch.save(model.state_dict(), './model.pth')

In [None]:
outputs = []
names = []

with torch.no_grad():
    for _, (images, name) in tqdm(enumerate(test_loader)):
        output = model(images.to(device))
        for i in range(len(name)):
            outputs.append(output[i].item())
            names.append(name[i])

In [None]:
# outputs = []
# names = []

# state_dict = {}
# bestmodel = Network().to(device)
# bestmodel.load_state_dict(torch.load('model.pth'))

# with torch.no_grad():
#     for _, (images, name) in tqdm(enumerate(test_loader)):
#         output = bestmodel(images.to(device))
#         for i in range(len(name)):
#             outputs.append(output[i].item())
#             names.append(name[i])

In [None]:
pred = pd.DataFrame({'ImageDir': names, 'AvgWeight': outputs})

In [None]:
# pred.loc[pred['AvgWeight'] < 5, 'AvgWeight'] = 5

In [None]:
# pred['AvgWeight'] = scaler.transform(pred['AvgWeight'])

In [None]:
sub = pred.groupby(['ImageDir'], as_index=False).mean()
sub.head()

In [None]:
sub.to_csv('./sub.csv', index=False)

In [None]:
# from torch.autograd import Variable
from sklearn.model_selection import KFold


# def train(fold=5): 
# num_epochs = 5
# for epoch in range(num_epochs):
    
#     for i, (images, targets) in tqdm(enumerate(train_loader)):  ##715
#         image = Variable(images.to(device))
#         targets = Variable(targets.to(device))
        
#         optimizer.zero_grad()
        
#         outputs = model(image)
#         losses = loss_fn(outputs, targets)

#         losses.backward()
#         optimizer.step()

#         if (i) % 100 == 0:
#             print('Epoch: {}, i: {}, loss: {}'.format(epoch+1, i, losses))