In [1]:
import torch
import cv2
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset

class ImageDataset(Dataset):
    def __init__(self, csv, train, test):
        self.csv = csv
        self.train = train
        self.test = test
        self.all_image_names = self.csv[:]['Id']
        self.all_labels = np.array(self.csv.drop(['ID', 'Target_List'], axis=1))
        self.train_ratio = int(0.85 * len(self.csv))
        self.valid_ratio = len(self.csv) - self.train_ratio
        # set the training data images and labels
        if self.train == True:
            print(f"Number of training images: {self.train_ratio}")
            self.image_names = list(self.all_image_names[:self.train_ratio])
            self.labels = list(self.all_labels[:self.train_ratio])
            # define the training transforms
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((400, 400)),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomRotation(degrees=45),
                transforms.ToTensor(),
            ])
        # set the validation data images and labels
        elif self.train == False and self.test == False:
            print(f"Number of validation images: {self.valid_ratio}")
            self.image_names = list(self.all_image_names[-self.valid_ratio:-10])
            self.labels = list(self.all_labels[-self.valid_ratio:-10])
            # define the validation transforms
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((400, 400)),
                transforms.ToTensor(),
            ])
        # set the test data images and labels, only last 10 images
        # this, we will use in a separate inference script
        elif self.test == True and self.train == False:
            self.image_names = list(self.all_image_names[-10:])
            self.labels = list(self.all_labels[-10:])
             # define the test transforms
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.ToTensor(),
            ])
            
    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, index):
        file_path = f"../input/drug-classifier/drug_CNN/Table_To_Image_Conversion/10k/data/_{self.image_names[index]}_data.txt"
        data = np.loadtxt(file_path)/255 # replaces transforms.ToPILImage() because these values are already 0-255 (though not integers)
        
        # apply image transforms
        image = self.transform(image)
        
        targets = self.labels[index]
        
        return {
            'image': torch.tensor(image, dtype=torch.float32),
            'label': torch.tensor(targets, dtype=torch.float32)
        }

In [2]:
import pandas as pd

In [15]:
train_csv = pd.read_csv('../input/drug-classifier/drug_CNN/train_drugidx.csv')
labels=np.array(train_csv.drop(['ID', 'Target_List'], axis=1))

In [32]:
index = 10
targets = labels[index]



In [None]:
idx = int(file_path.split('/')[-1].split('_')[1])        
label = torch.Tensor(fill_oh([target_to_oh[x] for x in dt_dict[idx].split('; ')], OH_LEN))


# Read and preprocess data from the .txt file
data = np.loadtxt(file_path)
data = np.expand_dims(np.trunc(data), axis=0) 

In [8]:
image

In [6]:
image

In [None]:
# convert the image from BGR to RGB color format
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# apply image transforms
image = self.transform(image)
targets = self.labels[index]

In [None]:
file_path = self.file_paths[idx]


In [None]:
# Class for Gene Expression data converted to image format
class CustomDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform

        # Collect file paths
        file_paths = glob.glob(os.path.join(root, '*.txt'))
        safe_idx = set(dt_idx.index).intersection(set(dt_dict.keys()))
        other_idx = target_to_oh['Others']

        # Filter out paths with label "other"
        self.file_paths = [x for x in file_paths if int(x.split('/')[-1].split('_')[1]) in safe_idx and other_idx not in [target_to_oh[label] for label in dt_dict[int(x.split('/')[-1].split('_')[1])].split('; ')]]
        print('Files removed from set due to "other":')
        print(len(set(file_paths).difference(set(self.file_paths))))
        
    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        idx = int(file_path.split('/')[-1].split('_')[1])        
        label = torch.Tensor(fill_oh([target_to_oh[x] for x in dt_dict[idx].split('; ')], OH_LEN))
        
        
        # Read and preprocess data from the .txt file
        data = np.loadtxt(file_path)
        data = np.expand_dims(np.trunc(data), axis=0)

        # Convert to PyTorch tensors
        #data = torch.FloatTensor(data)
        # Apply transformations
        if self.transform:
            data = self.transform(data)
        
        return data, label

In [3]:
import models
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from engine import train, validate
from dataset import ImageDataset
from torch.utils.data import DataLoader
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
matplotlib.style.use('ggplot')

# initialize the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#intialize the model
model = models.model(pretrained=True, requires_grad=False).to(device)
# learning parameters
lr = 0.0001
epochs = 2
batch_size = 32
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

# read the training csv file
train_csv = pd.read_csv('../input/drug-classifier/drug_CNN/train_drugidx.csv')
# train dataset
train_data = ImageDataset(
    train_csv, train=True, test=False
)
# validation dataset
valid_data = ImageDataset(
    train_csv, train=False, test=False
)
# train data loader
train_loader = DataLoader(
    train_data, 
    batch_size=batch_size,
    shuffle=True
)
# validation data loader
valid_loader = DataLoader(
    valid_data, 
    batch_size=batch_size,
    shuffle=False
)


Number of training images: 9255
Number of validation images: 1634


In [10]:
train_csv = pd.read_csv('../input/drug-classifier/drug_CNN/train_drugidx.csv')
genres = train_csv.columns.values[2:]

target = train_data[0]['label']
target_indices = np.where(target == 1)[0]
target = genres[target_indices]
if target == 'Others':
    

  'image': torch.tensor(image, dtype=torch.float32),


array([ True])

In [9]:
z

array(['Others'], dtype=object)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 

In [None]:

# start the training and validation
train_loss = []
valid_loss = []
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = train(
        model, train_loader, optimizer, criterion, train_data, device
    )
    valid_epoch_loss = validate(
        model, valid_loader, criterion, valid_data, device
    )
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f'Val Loss: {valid_epoch_loss:.4f}')

# save the trained model to disk
torch.save({
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criterion,
            }, '../outputs/model.pth')
# plot and save the train and validation line graphs
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(valid_loss, color='red', label='validataion loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('../outputs/loss.png')
plt.show()