In [148]:
data_dir = "../../dataset/rsna-2024-lumbar-spine-degenerative-classification/"

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.nn import functional as F
import numpy as np
import torchvision
import pandas as pd
import os
import pydicom

In [185]:

class DCMImageDataset(Dataset):
    def __init__(self, series, coordinates_file, descriptions_file, train_file, img_dir, file_counts):
        self.coordinates = coordinates_file
        self.descriptions =  descriptions_file
        self.train = train_file
        self.series = series
        self.img_dir = img_dir

        merge = descriptions_file.merge(train_file, on='study_id', how='left')
        f = merge[merge['series_description'] == series]
        result = []
        for i in range(len(f)):
            study_id = f.iloc[i]['study_id']
            series_id = f.iloc[i]['series_id']
            ndf = f[(f['study_id'] == study_id) & (f['series_id'] == series_id)]
            
            expanded_dfs = []
            for j in file_counts[str(study_id)][str(series_id)]:
                ndf['number'] = j.split('.')[0]
                expanded_dfs.append(ndf.copy())
            
            dfs = pd.concat(expanded_dfs).reset_index(drop=True)
            result.append(dfs.copy())

        mapping = {'Normal/Mild' : 0, 'Moderate' : 1, 'Severe' : 2}
        self.df = pd.concat(result).reset_index(drop=True)
        self.label_column = self.df.columns[3:-1]
        self.df[self.label_column] = self.df[self.label_column].replace(mapping)

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        study_id = str(self.df.iloc[idx]['study_id'])
        series_id = str(self.df.iloc[idx]['series_id'])

        img_path = os.path.join(str(self.img_dir + 'train_images'), study_id)
        img_path = os.path.join(img_path, series_id)
        img_path = img_path + '/' + str(self.df.iloc[idx]['number']) + '.dcm'

        image = torch.from_numpy(pydicom.dcmread(str(img_path)).pixel_array.astype(np.float64))
        image = image.unsqueeze(0)
        image = image.unsqueeze(0)
        image = F.interpolate(image, (224,224), mode='bilinear')
        image = image.reshape(224, 224)

        label = self.df.iloc[idx][self.label_column].tolist()
        label = torch.tensor(label)

        return image, label
            

In [100]:
coordinates = pd.read_csv(data_dir + 'train_label_coordinates.csv')
descriptions = pd.read_csv(data_dir + 'train_series_descriptions.csv')
train = pd.read_csv(data_dir + 'train.csv')

file_counts = {}
study_ids = os.listdir(data_dir + 'train_images')

for study_id in study_ids:
    series_ids = os.listdir(data_dir + 'train_images/' + study_id)
    tmp = {}
    for series_id in series_ids:
        tmp[series_id] = os.listdir(data_dir + 'train_images/' + study_id + '/' + series_id)

    file_counts[study_id] = tmp

In [186]:
dataset = DCMImageDataset(series='Axial T2',
                          coordinates_file=coordinates,
                          descriptions_file=descriptions,
                          train_file=train,
                          img_dir=data_dir,
                          file_counts=file_counts,
                          )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf['number'] = j.split('.')[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf['number'] = j.split('.')[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf['number'] = j.split('.')[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [173]:
print(len(dataset))

79979


In [203]:
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, drop_last=True)

In [204]:
for input, true in dataloader:
    print(input.shape)
    print(true.shape)
    break

torch.Size([64, 224, 224])
torch.Size([64, 25])


In [None]:
class model 