2.1 Data Manipulation

2.1.1 Getting Started

In [3]:
import torch

In [4]:
x=torch.arange(12, dtype=torch.float32)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [None]:
x.shape

In [None]:
torch.Size([12])

In [5]:
x.numel()

12

In [6]:
X=x.reshape(3,4)
X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [None]:
torch.zeros((2,3,4))

In [None]:
torch.ones((2,3,4))

In [None]:
toch.randn(3,4)

In [8]:
torch.tensor([[2,1,4,3],[1,2,3,4],[4,3,2,1,]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

2.1.2 Operations

In [9]:
x=torch.tensor([1.0,2,4,8])
y=torch.tensor([2,2,2,2])
x+y, x-y, x*y, x/y, x**y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

In [10]:
torch.exp(x)

tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])

In [11]:
X=torch.arange(12,dtype=torch.float32).reshape((3,4))
Y=torch.tensor([[2.0,1,4,3],[1,2,3,4],[4,3,2,1]])
torch.cat((X,Y),dim=0), torch.cat((X,Y),dim=1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [12]:
X==Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [13]:
X.sum()

tensor(66.)

2.1.3 Broadcasting Mechanism

In [15]:
a=torch.arange(3).reshape((3,1))
b=torch.arange(2).reshape((1,2))
a,b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [16]:
a+b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

2.1.4 Indexing and Slicing

In [17]:
X[-1],X[1:3]

(tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

In [18]:
X[1,2]=9
X

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  9.,  7.],
        [ 8.,  9., 10., 11.]])

In [20]:
X[0:2,:]=12
X

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [ 8.,  9., 10., 11.]])

2.1.5 Saving Memory

In [21]:
before=id(Y)
Y=Y+X
id(Y)==before

False

In [22]:
Z=torch.zeros_like(Y)
print('id(Z):',id(Z))
Z[:]=X+Y
print('id(Z):',id(Z))

id(Z): 2025307085616
id(Z): 2025307085616


In [23]:
before=id(X)
X+=Y
id(X)==before

True

2.1.6 Conversion to Other Python Objects

In [24]:
A=X.numpy()
B=torch.from_numpy(A)
type(A),type(B)

(numpy.ndarray, torch.Tensor)

In [25]:
a=torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

2.2 Data Preprocessing

2.2.1 Reading the Dataset

In [26]:
import os

os.makedirs(os.path.join('..','data'),exist_ok=True)
data_file=os.path.join('..','data','house_tiny.csv')
with open(data_file, 'w')as f:
    f.write('NumRooms, Alley, Price\n')
    f.write('NA,Pave,127500\n')
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [28]:
import pandas as pd

data=pd.read_csv(data_file)
print(data)

   NumRooms  Alley   Price
0       NaN   Pave  127500
1       2.0    NaN  106000
2       4.0    NaN  178100
3       NaN    NaN  140000


2.2.2 Handling Missing Data

In [31]:
inputs, outputs=data.iloc[:,0:2],data.iloc[:,2]
inputs=inputs.fillna(inputs.mean())
print(inputs)

   NumRooms  Alley
0       3.0   Pave
1       2.0    NaN
2       4.0    NaN
3       3.0    NaN


In [32]:
inputs=pd.get_dummies(inputs,dummy_na=True)
print(inputs)

   NumRooms   Alley_Pave   Alley_nan
0       3.0            1           0
1       2.0            0           1
2       4.0            0           1
3       3.0            0           1


2.2.3 Conversion to the Tensor Format

In [33]:
import torch

X,y=torch.tensor(inputs.values), torch.tensor(outputs.values)
X,y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

2.3 Linear Algebra
2.3.1 Scalars

In [34]:
import torch

x=torch.tensor(3.0)
y=torch.tensor(2.0)

x+y, x*y, x/y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

2.3.2 Vectors

In [35]:
x=torch.arange(4)
x

tensor([0, 1, 2, 3])

In [36]:
x[3]

tensor(3)

In [37]:
len(x)

4

In [38]:
x.shape

torch.Size([4])

2.3.3 Matrices

In [39]:
A=torch.arange(20).reshape(5,4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

In [40]:
A.T

tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])

In [41]:
B=torch.tensor([[1,2,3],[2,0,4],[3,4,5]])
B

tensor([[1, 2, 3],
        [2, 0, 4],
        [3, 4, 5]])

In [42]:
B==B.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

2.3.4 Tensors

In [43]:
X=torch.arange(24).reshape(2,3,4)
X

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

2.3.5 Basic Properties of Tensor Arithmetic

In [44]:
A=torch.arange(20, dtype=torch.float32).reshape(5,4)
B=A.clone()
A, A+B

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  2.,  4.,  6.],
         [ 8., 10., 12., 14.],
         [16., 18., 20., 22.],
         [24., 26., 28., 30.],
         [32., 34., 36., 38.]]))

In [45]:
A*B

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

In [46]:
a=2
X=torch.arange(24).reshape(2,3,4)
a+X,(a*X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

2.3.6 Reduction

In [47]:
x=torch.arange(4,dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2., 3.]), tensor(6.))

In [48]:
A.shape, A.sum()

(torch.Size([5, 4]), tensor(190.))

In [50]:
A_sum_axis1=A.sum(axis=1)
A_sum_axis1, A_sum_axis1.shape

(tensor([ 6., 22., 38., 54., 70.]), torch.Size([5]))

In [51]:
A.sum(axis=[0,1])

tensor(190.)

In [52]:
A.mean(),A.sum()/A.numel()

(tensor(9.5000), tensor(9.5000))

In [53]:
A.mean(axis=0),A.sum(axis=0)/A.shape[0]

(tensor([ 8.,  9., 10., 11.]), tensor([ 8.,  9., 10., 11.]))

In [54]:
sum_A=A.sum(axis=1,keepdims=True)
sum_A

tensor([[ 6.],
        [22.],
        [38.],
        [54.],
        [70.]])

In [55]:
A/sum_A

tensor([[0.0000, 0.1667, 0.3333, 0.5000],
        [0.1818, 0.2273, 0.2727, 0.3182],
        [0.2105, 0.2368, 0.2632, 0.2895],
        [0.2222, 0.2407, 0.2593, 0.2778],
        [0.2286, 0.2429, 0.2571, 0.2714]])

In [56]:
A.cumsum(axis=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]])

2.3.7 Dot products

In [57]:
y=torch.ones(4,dtype=torch.float32)
x,y,torch.dot(x,y)

(tensor([0., 1., 2., 3.]), tensor([1., 1., 1., 1.]), tensor(6.))

In [58]:
torch.sum(x*y)

tensor(6.)

2.3.8 Matrix-Vector Products

In [59]:
A.shape, x.shape, torch.mv(A,x)

(torch.Size([5, 4]), torch.Size([4]), tensor([ 14.,  38.,  62.,  86., 110.]))

2.3.9 Matrix-Matrix Multiplication

In [60]:
B=torch.ones(4,3)
torch.mm(A,B)

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

2.3.10 Norms

In [62]:
u=torch.tensor([3.0,-4.0])
torch.norm(u)

tensor(5.)

In [63]:
torch.abs(u).sum()

tensor(7.)

In [64]:
torch.norm(torch.ones((4,9)))

tensor(6.)

2.4 Calculus
2.4.1 Derivatives and Differentiation

In [65]:
import numpy as np
from IPython import display
from d2l import torch as d2l

def f(x):
    return 3*x**2-4*x

ModuleNotFoundError: No module named 'd2l'

In [66]:
def numerical_lim(f,x,h):
    return(f(x+h)-f(x))/h

h=0.1
for i in range(5):
    print(f'h={h:.5f},numerical limit={numerical_lim(f,1,h):.5f}')
    h*=0.1

TypeError: '_io.TextIOWrapper' object is not callable

In [67]:
def use_svg_display():
    """Use the svg format to display a plot in Jupyter."""
    display.set_matplotlib_formats('svg')

In [68]:
def set_figsize(figsize=(3.5,2.5)):
    """Set the figure size for matplotlib."""
    use_svg_display()
    d2l.plt.rcParams['figure.figsize']=figsize

In [69]:
def set_axes(axes,xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    """Set the axes for matplotlib."""
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()

In [70]:
def plot(X,Y=None,ylabel=None,legend=None,xlim=None, ylim=None, xscale='linear',yscale='linear', fmts=('-','m--','g-.','r:'),
        figsize=(3.5,2.5),axes=None):
    """Plot data points."""
    if legend is None:
        legend=[]
        
    set_figsize(figsize)
    axes=axes if axes else d2l.plt.gca()
    
    def has_one_axies(X):
        return (hasattr(X,"ndim") and X.ndim==1 or isinstance(X,list)
               and not hasattr(X[0],"__len__"))
    
    if has_one_axis(X):
        X=[X]
    if Y is None:
        X,Y=[[]]*len(X),X
    elif has_one_axis(Y):
        Y=[Y]
    if len(X)!=len(Y):
        X=X*len(Y)
    axes.cla()
    for x,y,fmt in zip(X,Y,fmts):
        if len(x):
            axes.plot(x,y,fmt)
        else:
            axes.plot(y,fmt)
    set_axes(axes,xlabel,ylabel,xlim,ylim,xscale,yscale,legend)

In [71]:
x=np.arange(0,3,0.1)
plot(x,[f(x),2*x-3],'x', 'f(x)', lengend['f(x)','Tangent line (x=1)'])

TypeError: '_io.TextIOWrapper' object is not callable

2.5 Automatic Differentiation
2.5.1 A simple Example

In [72]:
import torch

x=torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [73]:
x.requires_grad_(True)
x.grad

In [74]:
y=2*torch.dot(x,x)
y

tensor(28., grad_fn=<MulBackward0>)

In [75]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [76]:
x.grad==4*x

tensor([True, True, True, True])

In [77]:
x.grad.zero_()
y=x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

2.5.2 Backward for Non-Scalar Variables

In [78]:
x.grad.zero_()
y=x*x
y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])

2.5.3 Detaching Computation

In [79]:
x.grad.zero_()
y=x*x
u=y.detach()
z=u*x

z.sum().backward()
x.grad==u

tensor([True, True, True, True])

In [80]:
x.grad.zero_()
y.sum().backward()
x.grad==2*x

tensor([True, True, True, True])

2.5.4 Computing the Gradient of Python Control Flow

In [81]:
def f(a):
    b=a*2
    while b.norm()<1000:
        b=b*2
    if b.sum()>0:
        c=b
    else:
        c=100*b
    return c

a=torch.randn(size=(),requires_grad=True)
d=f(a)
d.backward()

In [82]:
a.grad==d/a

tensor(True)

2.6 Probability
2.6.1 Basic Probability Theory

In [83]:
import torch
from torch.distributions import multinomial
from d2l import torch as d21

ModuleNotFoundError: No module named 'd2l'

In [84]:
multinomial.Multinomial(10,fair_probs).sample()

NameError: name 'fair_probs' is not defined

In [85]:
counts=multinomial.Multinomial(1000,fair_probs).sample()
counts/1000

NameError: name 'fair_probs' is not defined

In [86]:
counts=multinomial.Multinomial(10,fair_probs).sample((500,))
cum_counts=counts.cumsum(dim=0)
estimates=cum_counts/cum_counts.sum(dim=1,keepdims=True)

d21.set_figsize((6,4.5))
for i in range(6):
    d2l.plt.plot(estimates[:,1].numpy(),
                label=("P(die="+str(i+1)+")"))
    d2l.plt.axhline(y=0.167,color='black',linestyle='dashed')
    d2l.plt.gca().set_xlabel('Groups of experiments')
    d2l.plt.gca().set_ylabel('Estimated probability')
    d2l.plt.legend();

NameError: name 'fair_probs' is not defined

2.7 Documentation
2.7.1 Finding All the Functions and Classes in a Module

In [None]:
import torch
print(dir(torch.distributions))