# Chapter 2 - Preliminaries

## 2.1. Data Manipulation

### 2.1.1 Getting Started

In [1]:
import torch

In [2]:
# create a vector of 12 consecutive numbers and cast it to float32
x = torch.arange(12, dtype=torch.float32)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [3]:
# the total number of elements in the tensor
x.numel()

12

In [4]:
# the shape of the tensor
x.shape

torch.Size([12])

In [5]:
# change the size of the tensor from (12,) to (4, 3)
X = x.reshape(4, 3)
X

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

Given a tensor of size `(n,)` and target shape `(h, w)`, we know that `w = n/h`. To automatically infer one component of the shape, we can place a `-1` for the shape component that should be inferred automatically.

In [6]:
x.reshape(-1, 3)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

In [7]:
x.reshape(4, -1)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

In [8]:
# create a tensor of zeros with shape (2, 3, 4)
torch.zeros((2, 3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [9]:
# create a tensor of ones with shape (2, 3, 4)
torch.ones((2, 3, 4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [10]:
# create a tensor of random numbers from a standard Gaussian with shape (2, 3, 4)
torch.randn(2, 3, 4)

tensor([[[-0.2823, -0.1301, -0.8505,  0.3788],
         [ 0.0124,  1.0500, -0.9956, -0.7030],
         [ 0.7702, -1.4320, -0.9849, -1.1856]],

        [[-1.4158, -0.5946, -2.1508,  1.4946],
         [ 0.3059,  0.1511,  2.1627,  0.9757],
         [-0.0852,  1.4076,  0.0566, -0.0978]]])

In [11]:
# create a tensor with the exact values provided
torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

### 2.1.2. Indexing and Slicing

In [12]:
# recall what X is
X

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

In [13]:
# select the last row
X[-1]

tensor([ 9., 10., 11.])

In [14]:
# select the second and third rows
X[1:3]

tensor([[3., 4., 5.],
        [6., 7., 8.]])

In [15]:
# change value
X[1, 2] = 17
X

tensor([[ 0.,  1.,  2.],
        [ 3.,  4., 17.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

In [16]:
# assign multiple elements the same value
X[:2, :] = 12
X

tensor([[12., 12., 12.],
        [12., 12., 12.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])

### 2.1.3. Operations

In [17]:
# Recall what x is
x

tensor([12., 12., 12., 12., 12., 12.,  6.,  7.,  8.,  9., 10., 11.])

In [18]:
# exponential function
torch.exp(x)

tensor([162754.7969, 162754.7969, 162754.7969, 162754.7969, 162754.7969,
        162754.7969,    403.4288,   1096.6332,   2980.9580,   8103.0840,
         22026.4648,  59874.1406])

In [19]:
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])

In [20]:
x + y

tensor([ 3.,  4.,  6., 10.])

In [21]:
x - y

tensor([-1.,  0.,  2.,  6.])

In [22]:
x * y

tensor([ 2.,  4.,  8., 16.])

In [23]:
x / y

tensor([0.5000, 1.0000, 2.0000, 4.0000])

In [24]:
x ** y

tensor([ 1.,  4., 16., 64.])

In [25]:
# create two tensors with the same shape of (3, 4)
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

In [26]:
# concatenate two matrices along rows (dim=0)
torch.cat((X, Y), dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [27]:
# concatenate two matrices along columns (dim=1)
torch.cat((X, Y), dim=1)

tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])

In [28]:
# construct a boolean tensor via logical statements
X == Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [29]:
# sum up all the elements in the tensor
X.sum()

tensor(66.)

### 2.1.4. Broadcasting

Broadcasting works in the two following ways:
1. expand one or both arrays by copying eleemnts along axes with length 1 so that after this transformation, the two tensors have the same shape;
2. perform an elementwise operation on the resulting arrays.

In [30]:
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [31]:
# a + b still works
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

**Broadcasting** produces a *larger 3x2 matrix* by **replicating matrix `a` along the columns and matrix `b` along the rows before adding them elementwise.

### 2.1.5. Saving Memory

In [32]:
# recall X and Y
X, Y

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([[2., 1., 4., 3.],
         [1., 2., 3., 4.],
         [4., 3., 2., 1.]]))

In [33]:
before = id(Y)
Y = Y + X

In [34]:
# Y is assigned a new address
before, id(Y)

(1981284687040, 1981307456304)

This is undesirable because
1. We do not want to run around allocating memory unnecessarily all the time... **Whenever possible, we want to perform these updates in place**.
2. We want to point at the same parameters from multiple variables.

To perform in-place operations, we can assign the result of an operation to a previously allocated array Y by using slice notation: `Y[:] = <expression>`:

In [35]:
Z = torch.zeros_like(Y)
print('id(Z):', id(Z))

id(Z): 1981314162336


In [36]:
Z[:] = X + Y
# Z is NOT assigned a new address
print('id(Z):', id(Z))

id(Z): 1981314162336


If `X` is not reused in subsequent computation, we can also use `X[:] = X + Y` or `X += Y` to reduce the memory overhead of the operation:

In [37]:
before = id(X)
X += Y

In [38]:
before, id(X)

(1981284197456, 1981284197456)

### 2.1.6. Conversion to Other Python Objects

In [40]:
# recall X
X

tensor([[ 2.,  3.,  8.,  9.],
        [ 9., 12., 15., 18.],
        [20., 21., 22., 23.]])

In [41]:
# convert a tensor to a numpy array
A = X.numpy()
# convert a numpy array to a tensor
B = torch.from_numpy(A)

type(A), type(B)

(numpy.ndarray, torch.Tensor)

In [42]:
# convert a size-1 tensor to a Python scalar
a = torch.tensor([3.5])
a

tensor([3.5000])

In [43]:
a.item()

3.5

In [44]:
float(a)

3.5

In [45]:
int(a)

3

## 2.2. Data Preprocessing

### 2.2.1. Reading the Dataset

In [47]:
# create a csv file
import os
# create a folder named data if it does not exist
os.makedirs(os.path.join('..', 'data'), exist_ok=True)
# create a csv file in the folder data with the name house_tiny.csv if it does not exist
data_file = os.path.join('..', 'data', 'house_tiny.csv')

# write the csv file with the following content
with open(data_file, 'w') as f:
    f.write('''
            NumRooms, RoofType, Price
            NA, NA, 127500
            2, NA, 106000
            4, Slate, 178100
            NA, NA, 140000
            ''')

In [48]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

              NumRooms  RoofType   Price
0                   NA        NA  127500
1                    2        NA  106000
2                    4     Slate  178100
3                   NA        NA  140000


### 2.2.2. Data Preparation