In [16]:
import imageio
img_arr = imageio.imread("candy2.jpeg")
img_arr.shape #height*width*channel

import torch
img = torch.from_numpy(img_arr)
out = img.permute(2,0,1) #change the order of dimensions to channel*height*width
out.shape

torch.Size([3, 1024, 768])

In [58]:
import os

data_dir = "./image-cats/"
filenames = [name for name in os.listdir(data_dir) 
           if os.path.splitext(name)[-1] == ".png"]

batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype = torch.uint8)
batch.shape

#read image files, permute dimensions, put in batch
for i,name in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir,name))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2,0,1)
    img_t = img_t[:3]
    batch[i] = img_t
    
batch = batch.float() # make all values floating point
batch /= 255 # normalize by dividing to max possible value

#normalize by mean and std in each color channel
n_channels = batch.shape[-3]
for c in range(n_channels):
    #batch[:,c] is the collection of all images in the batch focused on a single color
    mean = torch.mean(batch[:,c])
    std = torch.std(batch[:,c])
    batch[:,c] = (batch[:,c] - mean) / std

In [76]:
#volumetric medical data: DICOM
data_dir = './volumetric-dicom/2-LUNG 3.0  B70f-04083/'
vol_arr = imageio.volread(data_dir, 'DICOM')
#vol_arr.shape # depth * height * width

vol = torch.from_numpy(vol_arr)
vol = torch.unsqueeze(vol,0) #add color channel dimension as the first (0) coordinate
#vol.shape # color * depth * height * width

Reading DICOM (examining files): 1/99 files (1.0%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 99/99  (100.0%)


torch.Size([1, 99, 512, 512])

In [163]:
#tabular data: csv file
import csv
import numpy as np

data_path = "./tabular-wine/winequality-white.csv"
full_table_arr = np.loadtxt(data_path, dtype = np.float32, delimiter=';', skiprows=1) #skiprows=N skips the first N rows
full_table_t = torch.from_numpy(full_table_arr)

#separate data from target (labels)
data_t = full_table_t[:, :-1]
target_t = full_table_t[:, -1]

#one hot encoding for the target
onehot_target_t = torch.zeros(target_t.shape[0], 10).long() #10 different values of target
onehot_target_t.scatter_(1, target_t.unsqueeze(1).long(), 1.0)

#normalization by mean and std for each column of data
data_mean = torch.mean(data_t, dim=0) #dim=0, mean is calculated along dimension 0, i.e., for each column
data_std = torch.std(data_t, dim=0)
data_norm = (data_t - data_mean) / data_std

bad_indexes = target_t <= 3
#bad_indexes.shape, bad_indexes.dtype, bad_indexes.sum()
bad_data = data_t[bad_indexes] #tensor with boolean data can be fed to a tensor to do filtering
#bad_data.shape

#divide data to bad, mid, and good according to label
bad_data = data_t[target_t <= 3]
mid_data = data_t[(target_t > 3) & (target_t < 7)]
good_data = data_t[target_t >= 7]
#bad_data.shape, mid_data.shape, good_data.shape

bad_mean = torch.mean(bad_data, dim=0)
mid_mean = torch.mean(mid_data, dim=0)
good_mean = torch.mean(good_data, dim=0)

total_sulfur_threshold = float(mid_mean[6]) #141.83
total_sulfur_data = data_t[:,6]
above_average_data = data_t[total_sulfur_data < total_sulfur_threshold]
#total_sulfur_threshold
#total_sulfur_data.shape
#above_average_data.shape

In [199]:
bikes_arr = np.loadtxt('./bike-sharing-dataset/hour-fixed.csv', 
                      dtype=np.float32, skiprows=1, delimiter=',',
                      converters = {1: lambda x: float(x[8:10])} )
bikes_t = torch.from_numpy(bikes_arr)
#bikes_t.shape

daily_bikes_t = bikes_t.view(-1, 24, 17) # -1 is a placeholder for the remaining dimension size
#daily_bikes_t.shape
daily_bikes_t = daily_bikes_t.transpose(1,2)

#onehot encoding of weather data (column 9): 1st attempt
firstday = bikes_t[:24].long()
weather_onehot = torch.zeros(firstday.shape[0],4)
#weather_onehot.shape
firstday[:,9]
weather_onehot.scatter_(dim=1, index=firstday[:,9].unsqueeze(1).long()-1, value=1.0)
torch.cat((bikes_t[:24],weather_onehot),dim=1) #concat along dim 1, i.e. column

#onehot encoding of weather data (column 9): 2nd attempt
#daily_bikes_t.shape
daily_weather_onehot = torch.zeros(daily_bikes_t.shape[0],4,daily_bikes_t.shape[2])
daily_weather_onehot.scatter_(dim=1, index = daily_bikes_t[:,9,:].unsqueeze(1).long()-1, value=1.0)
daily_weather_onehot.shape

daily_bikes_t = torch.cat((daily_bikes_t, daily_weather_onehot), dim = 1)
#daily_bikes_t.shape

#quantifying weather in [0.0,1.0] range
daily_bikes_t [:,9,:] = (daily_bikes_t [:,9,:] - 1.0) / 3.0

#quantifying temperature (column 10): 1st attempt [0,1] range
temp_t = daily_bikes_t[:,10,:]
temp_min = torch.min(temp_t)
temp_max = torch.max(temp_t)
daily_bikes_t[:,10,:] = (daily_bikes_t[:,10,:] - temp_min) / (temp_max - temp_min)

#quantifying temperature (column 10): 2nd attempt [-1,1] range
daily_bikes_t[:,10,:] = (daily_bikes_t[:,10,:] - torch.mean(temp_t)) / torch.std(temp_t)
