In [2]:
import torch

from teach.pytorch.chapter_preliminaries.pandas import outputs

x = torch.arange(12)
print(x)
print(x.shape)
print(x.numel())

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
torch.Size([12])
12


In [3]:
x = x.reshape(3, 4)
print(x)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [5]:
y = torch.zeros(2, 3, 4)
print(y)
z = torch.ones(2, 3, 4)
print(z)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])


In [9]:
a = torch.tensor([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a)
print(a.shape)
print(a.numel())

tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
torch.Size([3, 4])
12


In [12]:
b = torch.tensor([1.0, 2, 3, 4])
c = torch.tensor([2, 2, 2, 2])
print("b + c =", b + c)
print("b - c =", b - c)
print("b * c =", b * c)
print("b / c =", b / c)
print("b ** c =", b ** c)

b + c = tensor([3., 4., 5., 6.])
b - c = tensor([-1.,  0.,  1.,  2.])
b * c = tensor([2., 4., 6., 8.])
b / c = tensor([0.5000, 1.0000, 1.5000, 2.0000])
b ** c = tensor([ 1.,  4.,  9., 16.])


In [15]:
d = torch.arange(12, dtype=torch.float32).reshape(3, 4)
e = torch.tensor([[2.0, 0, 1, 3],[4, 2, 0, 1],[1, 5, 2, 0]])
print(torch.cat((d, e), dim = 0)) # 在行的维度上拼接
print(torch.cat((d, e), dim = 1)) # 在列的维度上拼接
print(d == e)
print(d.sum())

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  0.,  1.,  3.],
        [ 4.,  2.,  0.,  1.],
        [ 1.,  5.,  2.,  0.]])
tensor([[ 0.,  1.,  2.,  3.,  2.,  0.,  1.,  3.],
        [ 4.,  5.,  6.,  7.,  4.,  2.,  0.,  1.],
        [ 8.,  9., 10., 11.,  1.,  5.,  2.,  0.]])
tensor([[False, False, False,  True],
        [ True, False, False, False],
        [False, False, False, False]])
tensor(66.)


In [16]:
# 广播机制
f = torch.arange(3).reshape(3, 1)
g = torch.arange(2).reshape(1, 2)
print("f =", f)
print("g =", g)
print("f + g =", f + g)

f = tensor([[0],
        [1],
        [2]])
g = tensor([[0, 1]])
f + g = tensor([[0, 1],
        [1, 2],
        [2, 3]])


In [18]:
# 访问元素
h = torch.arange(12).reshape(3, 4)
print(h)
print(h[1, 2])      # 访问第2行第3列的元素
print(h[:, 1])      # 访问第2列的所有元素
print(h[0, :])      # 访问第1行的所有元素
print(h[1, :3])     # 访问第2行的前3个元素
print(h[ :2, 2])    # 访问前2行的第3列元素
print(h[ 1:3, 2])   # 访问第2到第3行的第3列元素


tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor(6)
tensor([1, 5, 9])
tensor([0, 1, 2, 3])
tensor([4, 5, 6])
tensor([2, 6])
tensor([ 6, 10])


In [19]:
before = id(h) # 获取变量h的内存地址
h = h + 1
print(id(h) == before)  # False, 因为进行了重新分配内存

False


In [21]:
# 原地操作，不会重新分配内存
before = id(h)
h += 1
print(id(h) == before)  # True, 内存地址没有改变

f = torch.zeros_like(h)
print("id f:", id(f))
f[:] = h - 1
print("id f after slice assignment:", id(f))  # 内存地址没有改变

True
id f: 4737941648
id f after slice assignment: 4737941648


In [22]:
# 张量与NumPy数组的转换
A = f.numpy()
B = torch.tensor(A)
print(type(A))
print(type(B))

<class 'numpy.ndarray'>
<class 'torch.Tensor'>


In [24]:
# 大小为1的张量转换为标量
a = torch.tensor([3.5])
print(a)
print(a.item())
print(type(a.item()))
print(float(a))
print(int(a))

tensor([3.5000])
3.5
<class 'float'>
3.5
3


In [25]:
import os
os.makedirs(os.path.join(".", "data"), exist_ok=True) # 创建data文件夹, ".."表示上一级目录, 本级目录的话就写"."， exist_ok=True表示如果目录已经存在就不报错
data_file = os.path.join(".", "data", "house_tiny.csv") # 拼接路径
with open(data_file, "w") as ff: # 写入数据, w表示写入模式,会覆盖原有内容
    ff.write("NumRooms,Alley,Price\n") # 写入表头
    ff.write("NA,Pave,127500\n")
    ff.write("2,NA,106000\n")
    ff.write("4,NA,178100\n")
    ff.write("3,NA,140000\n")

In [28]:
import pandas as pd
data = pd.read_csv(data_file)
data

Unnamed: 0,NumRooms,Alley,Price
0,,Pave,127500
1,2.0,,106000
2,4.0,,178100
3,3.0,,140000


In [48]:
inputs = data.iloc[:, 0:2] # 选择行：所有行，列：第0列到第1列（不包括第2列）
outputs = data.iloc[:, 2]  # 选择行：所有行，列：第2列
print(inputs)
print(type(inputs))
print(outputs)
print(type(outputs))
inputs = inputs.fillna(inputs.mean(numeric_only = 1))  # 用均值填充缺失值, numeric_only=1表示只计算数值型列的均值
print(inputs)

   NumRooms Alley
0       NaN  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN
<class 'pandas.core.frame.DataFrame'>
0    127500
1    106000
2    178100
3    140000
Name: Price, dtype: int64
<class 'pandas.core.series.Series'>
   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


In [49]:
inputs = pd.get_dummies(inputs, dummy_na = True, dtype = int) # 将分类变量转换为独热编码, dummy_na=True表示将缺失值也作为一个类别
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


In [52]:
ii = torch.tensor(inputs.values)
oo = torch.tensor(outputs.values)
ii,oo

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

In [20]:
import torch
aa = torch.tensor([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(aa)
bb = torch.sum(aa, dim = 1) # 按行求和
print(bb)
cc = bb.view(-1, 1) # 转换为列向量, -1表示自动计算该维度的大小
print(cc)

tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
tensor([10, 26, 42])
tensor([[10],
        [26],
        [42]])
