## Loading an image file

In [2]:
import imageio
img_arr = imageio.imread('../data/p1ch4/image-dog/bobby.jpg')
img_arr.shape

(720, 1280, 3)

## changing the layout

In [4]:
import torch
import torchvision
img = torch.from_numpy(img_arr)
out = img.permute(2,0,1)

In [6]:
out.shape

torch.Size([3, 720, 1280])

In [7]:
batch_size=3
batch = torch.zeros(batch_size,3,256,256,dtype=torch.uint8)

In [9]:
import os
data_dir = '../data/p1ch4/image-cats'
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1]=='.png']

In [10]:
for i, filename in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir,filename))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2,0,1)
    img_t = img_t[:3]
    batch[i] = img_t

In [11]:
batch

tensor([[[[156, 152, 124,  ..., 150, 149, 158],
          [174, 134, 165,  ..., 120, 136, 138],
          [127, 156, 107,  ..., 131, 143, 164],
          ...,
          [116, 130, 129,  ..., 127, 118, 112],
          [129, 130, 123,  ..., 115, 121, 114],
          [129, 123, 118,  ..., 113, 121, 120]],

         [[139, 135, 109,  ..., 135, 135, 147],
          [160, 119, 149,  ..., 105, 122, 124],
          [113, 140,  90,  ..., 118, 129, 152],
          ...,
          [ 99, 110, 111,  ..., 117, 108, 103],
          [111, 111, 106,  ..., 106, 112, 105],
          [111, 104, 102,  ..., 103, 110, 111]],

         [[129, 123,  98,  ..., 131, 132, 145],
          [155, 110, 137,  ..., 102, 119, 121],
          [104, 132,  80,  ..., 112, 125, 146],
          ...,
          [ 93, 108, 105,  ..., 125, 115, 108],
          [108, 108,  98,  ..., 110, 117, 110],
          [107,  98,  95,  ..., 108, 115, 116]]],


        [[[202, 193, 190,  ...,  13,  13,  12],
          [199, 192, 189,  ...,  14

## Normalizing the data

In [12]:
batch = batch.float()
batch /= 255.0

In [13]:
n_channels = batch.shape[1]
for c in range(n_channels):
    mean = torch.mean(batch[:,c])
    std = torch.std(batch[:,c])
    batch[:,c] = (batch[:,c]-mean)/std

In [14]:
batch

tensor([[[[ 0.1439,  0.0730, -0.4234,  ...,  0.0375,  0.0198,  0.1794],
          [ 0.4631, -0.2461,  0.3035,  ..., -0.4944, -0.2107, -0.1752],
          [-0.3703,  0.1439, -0.7249,  ..., -0.2993, -0.0866,  0.2858],
          ...,
          [-0.5653, -0.3171, -0.3348,  ..., -0.3703, -0.5298, -0.6362],
          [-0.3348, -0.3171, -0.4412,  ..., -0.5830, -0.4766, -0.6007],
          [-0.3348, -0.4412, -0.5298,  ..., -0.6185, -0.4766, -0.4944]],

         [[ 0.4632,  0.3874, -0.1058,  ...,  0.3874,  0.3874,  0.6150],
          [ 0.8615,  0.0839,  0.6529,  ..., -0.1816,  0.1408,  0.1787],
          [-0.0299,  0.4822, -0.4661,  ...,  0.0649,  0.2736,  0.7098],
          ...,
          [-0.2954, -0.0868, -0.0678,  ...,  0.0460, -0.1247, -0.2196],
          [-0.0678, -0.0678, -0.1627,  ..., -0.1627, -0.0489, -0.1816],
          [-0.0678, -0.2006, -0.2385,  ..., -0.2196, -0.0868, -0.0678]],

         [[ 0.7792,  0.6573,  0.1495,  ...,  0.8198,  0.8401,  1.1041],
          [ 1.3072,  0.3933,  

## Loading a specialized format

In [17]:
# import imageio
# dir_path ='../data\p1ch4\volumetric-dicom\2-LUNG 3.0  B70f-04083'

In [23]:
dir_path = "../data/p1ch4/volumetric-dicom/2-LUNG3.0B70f-04083" # remove the blank between character, otherwise you will have a error
vol_arr = imageio.volread(dir_path, 'DICOM')

Reading DICOM (examining files): 1/99 files (1.0%24/99 files (24.2%51/99 files (51.5%78/99 files (78.8%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 10/99  (10.122/99  (22.237/99  (37.448/99  (48.570/99  (70.783/99  (83.891/99  (91.999/99  (100.0%)


In [24]:
vol_arr.shape

(99, 512, 512)

In [25]:
vol = torch.from_numpy(vol_arr).float()
vol = torch.unsqueeze(vol,0) # unsqueeze 是增加依偎，squeeze是去掉一维

In [26]:
vol.shape

torch.Size([1, 99, 512, 512])

## Representing tabular data
### Using a real-world dataset

### Loading a wine data tensor

In [28]:
import csv
wine_path = '../data/p1ch4/tabular-wine/winequality-white.csv'

In [31]:
import numpy as np
wineq_numpy = np.loadtxt(wine_path,dtype=np.float32,delimiter=';',skiprows=1)
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [33]:
col_list = next(csv.reader(open(wine_path),delimiter=';'))

In [34]:
wineq_numpy.shape,col_list

((4898, 12),
 ['fixed acidity',
  'volatile acidity',
  'citric acid',
  'residual sugar',
  'chlorides',
  'free sulfur dioxide',
  'total sulfur dioxide',
  'density',
  'pH',
  'sulphates',
  'alcohol',
  'quality'])

In [35]:
wineq = torch.from_numpy(wineq_numpy)
wineq.shape,wineq.dtype

(torch.Size([4898, 12]), torch.float32)

In [39]:
data = wineq[:,:-1]
data,data.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
         [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
         [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
         ...,
         [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
         [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000],
         [ 6.0000,  0.2100,  0.3800,  ...,  3.2600,  0.3200, 11.8000]]),
 torch.Size([4898, 11]))

In [40]:
target = wineq[:,-1]

In [41]:
target,target.shape

(tensor([6., 6., 6.,  ..., 6., 7., 6.]), torch.Size([4898]))

In [42]:
target = wineq[:,-1].long()

In [43]:
target

tensor([6, 6, 6,  ..., 6, 7, 6])

## one-hot encoding

In [44]:
target_onehot = torch.zeros(target.shape[0],10)

In [45]:
target_onehot.scatter_(1,target.unsqueeze(1),1.0)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [46]:
target_unsqueezed = target.unsqueeze(1)
target_unsqueezed

tensor([[6],
        [6],
        [6],
        ...,
        [6],
        [7],
        [6]])

## When to categorize

In [47]:
data_mean = torch.mean(data,dim=0)
data_mean 

tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
        1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01])

In [48]:
data_var = torch.var(data,dim=0)
data_var

tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
        1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00])

In [49]:
data_normalized = (data-data_mean)/torch.sqrt(data_var)

In [50]:
data_normalized

tensor([[ 1.7208e-01, -8.1761e-02,  2.1326e-01,  ..., -1.2468e+00,
         -3.4915e-01, -1.3930e+00],
        [-6.5743e-01,  2.1587e-01,  4.7996e-02,  ...,  7.3995e-01,
          1.3422e-03, -8.2419e-01],
        [ 1.4756e+00,  1.7450e-02,  5.4378e-01,  ...,  4.7505e-01,
         -4.3677e-01, -3.3663e-01],
        ...,
        [-4.2043e-01, -3.7940e-01, -1.1915e+00,  ..., -1.3130e+00,
         -2.6153e-01, -9.0545e-01],
        [-1.6054e+00,  1.1666e-01, -2.8253e-01,  ...,  1.0049e+00,
         -9.6251e-01,  1.8574e+00],
        [-1.0129e+00, -6.7703e-01,  3.7852e-01,  ...,  4.7505e-01,
         -1.4882e+00,  1.0448e+00]])

## Finding thresholds

In [52]:
bad_indexes = target<=3

In [53]:
bad_indexes.shape,bad_indexes.dtype,bad_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(20))

In [54]:
bad_data = data[bad_indexes]

In [55]:
bad_data.shape

torch.Size([20, 11])

In [56]:
bad_data = data[target<=3]
mid_data = data[(target>3)&(target<7)]

In [57]:
good_data = data[target>=7]

In [58]:
bad_mean = torch.mean(bad_data,dim=0)
mid_mean = torch.mean(mid_data,dim=0)
good_mean = torch.mean(good_data,dim=0)

In [59]:
for i,args in enumerate(zip(col_list,bad_mean,mid_mean,good_mean)):
    print('{:2} {:20} {:6.2f} {:6.2f} {:6.2f}'.format(i,*args))

 0 fixed acidity          7.60   6.89   6.73
 1 volatile acidity       0.33   0.28   0.27
 2 citric acid            0.34   0.34   0.33
 3 residual sugar         6.39   6.71   5.26
 4 chlorides              0.05   0.05   0.04
 5 free sulfur dioxide   53.33  35.42  34.55
 6 total sulfur dioxide 170.60 141.83 125.25
 7 density                0.99   0.99   0.99
 8 pH                     3.19   3.18   3.22
 9 sulphates              0.47   0.49   0.50
10 alcohol               10.34  10.26  11.42


In [61]:
total_sulfur_threshold = 141.83
total_sulfur_data = data[:,6]
predicted_indexes = torch.lt(total_sulfur_data,total_sulfur_threshold)
predicted_indexes.shape,predicted_indexes.dtype,predicted_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(2727))

In [62]:
actual_indexes = target>5

In [63]:
actual_indexes.shape,actual_indexes.dtype,actual_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(3258))

In [65]:
n_matches = torch.sum(actual_indexes&predicted_indexes).item()

In [66]:
n_matches

2018

In [67]:
n_predicted = torch.sum(predicted_indexes).item()

In [68]:
n_actual = torch.sum(actual_indexes).item()
n_matches,n_matches/n_predicted,n_matches/n_actual

(2018, 0.74000733406674, 0.6193984039287906)

## Working with time series

### Adding a time dimension