# Notkun gervigreindar fyrir greiningu á þrívíddarmyndum

Nathan Holmes-King

In [28]:
import numpy as np
import pandas as pd
import pywikibot
import sklearn as sk
from sklearn.model_selection import train_test_split
from stl import mesh
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
device = torch.device("cpu")#"mps" if torch.backends.mps.is_available() else "cpu")
torch.set_default_device(device)

## Inngangsorð
Við ætlum að þjálfa gervigreindarlíkan til að greina þrívíddarmyndir. Í notkun eru líkön sem geta það, en þau nota alltaf "bitmap"-myndir. Þetta líkan hér notar "vector"-myndir eins og Envalys.

## Gögn
Þessi gögn eru STL-skrár frá Wikimedia Commons. Það eru fimm flokkar:
- líkamshlutar
- byggingar
- rúmfræði
- geimfarartæki
- styttur

### Sækja gögn

In [29]:
flokkar = ['body parts', 'buildings', 'geometric shapes', 'objects in space', 'sculptures']
skrar = {}
catnum = {}

In [30]:
commons = pywikibot.Site('commons', 'commons')
cn = 0
for a in flokkar:
    print(a)
    cat = pywikibot.Category(commons, 'STL files of ' + a)
    catnum[a] = cn
    cn += 1
    n = 0
    for p in cat.members(member_type=['file']):
        if n % 10 == 0:
            print(n)
        mynd = pywikibot.FilePage(p)
        try:
            tempf = open('/Users/002-nathan/Desktop/Envalys/STLdata/' + a + '_' + p.title()[5:], 'r')
            tempf.close()
        except FileNotFoundError:
            mynd.download(filename='/Users/002-nathan/Desktop/Envalys/STLdata/' + a + '_' + p.title()[5:])
        try:
            skrar[a].append(p.title()[5:])
        except KeyError:
            skrar[a] = [p.title()[5:]]
        n += 1
        if n >= 100:
            break

body parts
0
10
20
30
40
50
60
70
80
buildings
0
10
20
geometric shapes
0
10
20
30
40
objects in space
0
10
20
30
40
50
sculptures
0
10
20
30
40
50


### Setja upp gögn fyrir notkun
Við deilum myndinni í 2.097.152 (128x128x128) þrívíddardíla eða "voxels", teljum punktana í hverjum díl, og notum töluna til að greina myndina.

In [43]:
X_preproc = []
y_preproc = []
for cat in skrar:
    print(cat)
    byrjun = time.time()
    for fi in skrar[cat]:
        # Load data
        gogn = mesh.Mesh.from_file('/Users/002-nathan/Desktop/Envalys/STLdata/' + cat + '_' + fi)
        # Re-scale to be 
        gogn_x = [a[0] for a in gogn.v0]# + [a[0] for a in gogn.v1] + [a[0] for a in gogn.v2]
        gogn_y = [a[1] for a in gogn.v0]# + [a[1] for a in gogn.v1] + [a[1] for a in gogn.v2]
        gogn_z = [a[2] for a in gogn.v0]# + [a[2] for a in gogn.v1] + [a[2] for a in gogn.v2]
        minx = min(gogn_x)
        miny = min(gogn_y)
        minz = min(gogn_z)
        scale = min(128 / (max(gogn_x) + 1e-3 - minx), 
                    128 / (max(gogn_y) + 1e-3 - miny), 
                    128 / (max(gogn_z) + 1e-3 - minz))
        ny_gogn = np.zeros((1, 128, 128, 128), dtype=np.float32)
        for a in gogn.v0:
            x = int((a[0] - minx) * scale)
            y = int((a[1] - miny) * scale)
            z = int((a[2] - minz) * scale)
            ny_gogn[0][x][y][z] += 1
        X_preproc.append(ny_gogn)
        y_preproc.append([catnum[cat]])
    print(time.time() - byrjun)
    print('----')

body parts
45.23072910308838
----
buildings
25.889988899230957
----
geometric shapes
32.926254987716675
----
objects in space
exception (False, 'No lines found, impossible to read')
46.50938415527344
----
sculptures
118.43725895881653
----


In [50]:
X_data = []
y_data = []
for a in X_preproc:
    X_data.append(torch.from_numpy(a).to(device))
for a in y_preproc:
    y_data.append(torch.tensor(a).to(device))

## Líkan

In [51]:
class likan(nn.Module):
    def __init__(self):
        super(likan, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, 5, padding=2)
        self.conv2 = nn.Conv3d(16, 32, 5, padding=2)
        self.conv3 = nn.Conv3d(32, 64, 5, padding=2)
        self.fc1 = nn.Linear(64*16*16*16, 1024)
        self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 5)  # Change second param
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = x.view(-1, 64*16*16*16)
        x = F.relu(self.fc1(x))
        x = F.dropout(x)
        x = F.relu(self.fc2(x))
        x = F.dropout(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

In [72]:
learning_rate = 0.01
momentum = 0.5

In [58]:
byrjun = time.time()
acc = []
for i in range(10):
    print('Random state:', i)
    X_data = []
    y_data = []
    for a in X_preproc:
        X_data.append(torch.from_numpy(a).to(device))
    for a in y_preproc:
        y_data.append(torch.tensor(a).to(device))
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=i)
    model = likan()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((5, 5), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((5, 5), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Random state: 0
Train accuracy: 0.33507853403141363
[[64  0  0  0  0]
 [19  0  0  0  0]
 [33  0  0  0  0]
 [32  0  0  0  0]
 [43  0  0  0  0]]
Test accuracy: 0.28125
[[18  0  0  0  0]
 [ 5  0  0  0  0]
 [12  0  0  0  0]
 [19  0  0  0  0]
 [10  0  0  0  0]]
Time: 700.9412417411804
----
Random state: 1
Train accuracy: 0.31413612565445026
[[60  0  0  0  0]
 [19  0  0  0  0]
 [34  0  0  0  0]
 [37  0  0  0  0]
 [41  0  0  0  0]]
Test accuracy: 0.34375
[[22  0  0  0  0]
 [ 5  0  0  0  0]
 [11  0  0  0  0]
 [14  0  0  0  0]
 [12  0  0  0  0]]
Time: 1394.9648568630219
----
Random state: 2
Train accuracy: 0.3089005235602094
[[59  0  0  0  0]
 [17  0  0  0  0]
 [34  0  0  0  0]
 [41  0  0  0  0]
 [40  0  0  0  0]]
Test accuracy: 0.359375
[[23  0  0  0  0]
 [ 7  0  0  0  0]
 [11  0  0  0  0]
 [10  0  0  0  0]
 [13  0  0  0  0]]
Time: 2128.283779859543
----
Random state: 3
Train accuracy: 0.29842931937172773
[[57  0  0  0  0]
 [17  0  0  0  0]
 [37  0  0  0  0]
 [38  0  0  0  0]
 [42  0  0  0  0]

Níu líkön úr tíu spá að allar myndir séu líkamshlutar. Er þetta af því það eru ekki nógar myndir í hinum flokkunum? Líkan 6 er með 44 styttumyndir í "train"-gögnunum, sem er það flesta í öllum lotunum, og er það einasta sem spáir aðra flokka. Nú skulum við þjálfa líkön án líkamshlutamynda.

In [65]:
class likan_alh(nn.Module):
    def __init__(self):
        super(likan_alh, self).__init__()
        self.conv1 = nn.Conv3d(1, 16, 5, padding=2)
        self.conv2 = nn.Conv3d(16, 32, 5, padding=2)
        self.conv3 = nn.Conv3d(32, 64, 5, padding=2)
        self.fc1 = nn.Linear(64*16*16*16, 1024)
        self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 4)  # Change second param
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool3d(x, 2)
        x = x.view(-1, 64*16*16*16)
        x = F.relu(self.fc1(x))
        x = F.dropout(x)
        x = F.relu(self.fc2(x))
        x = F.dropout(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

In [67]:
byrjun = time.time()
acc = []
for i in range(10):
    print('Random state:', i)
    X_data = []
    y_data = []
    j = 0
    while j < len(X_preproc):
        if y_preproc[j][0] != 0:
            X_data.append(torch.from_numpy(X_preproc[j]).to(device))
            y_data.append(torch.tensor(y_preproc[j]).to(device) - 1)
        j += 1
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=i)
    model = likan_alh()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Random state: 0
Train accuracy: 0.14728682170542637
[[19  0  0  0]
 [30  0  0  0]
 [38  0  0  0]
 [42  0  0  0]]
Test accuracy: 0.11363636363636363
[[ 5  0  0  0]
 [15  0  0  0]
 [13  0  0  0]
 [11  0  0  0]]
Time: 580.6093430519104
----
Random state: 1
Train accuracy: 0.13953488372093023
[[18  0  0  0]
 [33  0  0  0]
 [35  0  0  0]
 [43  0  0  0]]
Test accuracy: 0.13636363636363635
[[ 6  0  0  0]
 [12  0  0  0]
 [16  0  0  0]
 [10  0  0  0]]
Time: 1114.7346620559692
----
Random state: 2
Train accuracy: 0.12403100775193798
[[16  0  0  0]
 [33  0  0  0]
 [38  0  0  0]
 [42  0  0  0]]
Test accuracy: 0.18181818181818182
[[ 8  0  0  0]
 [12  0  0  0]
 [13  0  0  0]
 [11  0  0  0]]
Time: 1588.4203751087189
----
Random state: 3
Train accuracy: 0.13953488372093023
[[18  0  0  0]
 [32  0  0  0]
 [38  0  0  0]
 [41  0  0  0]]
Test accuracy: 0.13636363636363635
[[ 6  0  0  0]
 [13  0  0  0]
 [13  0  0  0]
 [12  0  0  0]]
Time: 2077.139023065567
----
Random state: 4
Train accuracy: 0.116279069767

Nú spáir níu líkön úr tíu að allar myndir séu byggingar, þrátt fyrir því að byggingamyndir eru fæstar í öllum líkönum. Líkan 6 spáir að allar myndir nema ein séu styttur. Við skulum prófa aftur með líkönum 0 og 6.

In [70]:
byrjun = time.time()
acc = []
for i in [0, 6]:
    print('Random state:', i)
    X_data = []
    y_data = []
    j = 0
    while j < len(X_preproc):
        if y_preproc[j][0] != 0:
            X_data.append(torch.from_numpy(X_preproc[j]).to(device))
            y_data.append(torch.tensor(y_preproc[j]).to(device) - 1)
        j += 1
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=i)
    model = likan_alh()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            print(output, y_pred, y_train[n])
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Random state: 0
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([0])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([2])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([0])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([1])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([0])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([2])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([2])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([3])
tensor([[nan, nan, nan, nan]]) tensor([[0]]) tensor([2])
tensor([[nan, n

Hvaðan koma allar þessar NaN-tölur? Og hvers vegna spáir líkan 6 eitthvað annað en í fyrra?

In [71]:
byrjun = time.time()
acc = []
for i in [0, 6]:
    print('Random state:', i)
    X_data = []
    y_data = []
    j = 0
    while j < len(X_preproc):
        if y_preproc[j][0] != 0:
            X_data.append(torch.from_numpy(X_preproc[j]).to(device))
            y_data.append(torch.tensor(y_preproc[j]).to(device) - 1)
        j += 1
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=i)
    model = likan_alh()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        print(n, output, loss)
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Random state: 0
0 tensor([[-1.2593, -1.3875, -1.3491, -1.5752]], grad_fn=<LogSoftmaxBackward0>) tensor(1.5752, grad_fn=<NllLossBackward0>)
1 tensor([[-1.7825, -1.3764, -1.7435, -0.9054]], grad_fn=<LogSoftmaxBackward0>) tensor(1.7825, grad_fn=<NllLossBackward0>)
2 tensor([[-1.1119, -2.4464, -2.7396, -0.6542]], grad_fn=<LogSoftmaxBackward0>) tensor(0.6542, grad_fn=<NllLossBackward0>)
3 tensor([[ -2.1972,  -6.3562, -10.3150,  -0.1198]],
       grad_fn=<LogSoftmaxBackward0>) tensor(10.3150, grad_fn=<NllLossBackward0>)
4 tensor([[-3.7430, -2.7885, -0.0946, -5.2860]], grad_fn=<LogSoftmaxBackward0>) tensor(5.2860, grad_fn=<NllLossBackward0>)
5 tensor([[-1.4930, -1.3742, -1.3338, -1.3517]], grad_fn=<LogSoftmaxBackward0>) tensor(1.4930, grad_fn=<NllLossBackward0>)
6 tensor([[-1.4887, -1.5248, -0.8859, -1.9358]], grad_fn=<LogSoftmaxBackward0>) tensor(1.9358, grad_fn=<NllLossBackward0>)
7 tensor([[-4.2736, -3.2277, -3.0028, -0.1089]], grad_fn=<LogSoftmaxBackward0>) tensor(0.1089, grad_fn=<NllLoss

Kannski eigum við að lækka námshraðann ("learning rate"). Og líkan 6 var bara heppið; það tók nærri tveim sinnum lengra til að verða NaN en líkan 0.

In [73]:
learning_rate = 0.001
momentum = 0.5

In [74]:
byrjun = time.time()
acc = []
for i in [0, 6]:
    print('Random state:', i)
    X_data = []
    y_data = []
    j = 0
    while j < len(X_preproc):
        if y_preproc[j][0] != 0:
            X_data.append(torch.from_numpy(X_preproc[j]).to(device))
            y_data.append(torch.tensor(y_preproc[j]).to(device) - 1)
        j += 1
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, random_state=i)
    model = likan_alh()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        print(n, output, loss)
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((4, 4), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Random state: 0
0 tensor([[-1.5538, -1.2743, -1.4360, -1.3055]], grad_fn=<LogSoftmaxBackward0>) tensor(1.3055, grad_fn=<NllLossBackward0>)
1 tensor([[-1.4058, -1.4876, -1.3623, -1.2989]], grad_fn=<LogSoftmaxBackward0>) tensor(1.4058, grad_fn=<NllLossBackward0>)
2 tensor([[-1.5877, -1.2475, -1.5297, -1.2318]], grad_fn=<LogSoftmaxBackward0>) tensor(1.2318, grad_fn=<NllLossBackward0>)
3 tensor([[-1.2168, -1.6360, -1.4493, -1.2934]], grad_fn=<LogSoftmaxBackward0>) tensor(1.4493, grad_fn=<NllLossBackward0>)
4 tensor([[-4.4423, -1.3190, -4.5708, -0.3418]], grad_fn=<LogSoftmaxBackward0>) tensor(0.3418, grad_fn=<NllLossBackward0>)
5 tensor([[-1.3925, -1.4994, -1.3392, -1.3234]], grad_fn=<LogSoftmaxBackward0>) tensor(1.3925, grad_fn=<NllLossBackward0>)
6 tensor([[-2.1605, -1.8004, -1.2008, -0.8710]], grad_fn=<LogSoftmaxBackward0>) tensor(0.8710, grad_fn=<NllLossBackward0>)
7 tensor([[-27.6683, -29.9345, -22.5681,   0.0000]],
       grad_fn=<LogSoftmaxBackward0>) tensor(0., grad_fn=<NllLossBackw

Nú er málið lagað. Ef við gerum meira en eina lotu ("epoch"), þá fáum við betri niðurstöður. Við skulum prófa aftur með 5 lotum og öllum myndum.

In [80]:
num_epochs = 5

In [81]:
X_data = []
y_data = []
j = 0
while j < len(X_preproc):
    X_data.append(torch.from_numpy(X_preproc[j]).to(device))
    y_data.append(torch.tensor(y_preproc[j]).to(device))
    j += 1
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data)

In [82]:
byrjun = time.time()
acc = []
for i in range(num_epochs):
    print('Epoch:', i)
    model = likan()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for n in range(len(X_train)):
        optimizer.zero_grad()
        output = model(X_train[n])
        loss = F.nll_loss(output, y_train[n])
        loss.backward()
        optimizer.step()
    trainmat = np.zeros((5, 5), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_train)):
            output = model(X_train[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_train[n]).sum().item()
            trainmat[y_train[n].item()][y_pred.item()] += 1
    print('Train accuracy:', correct / len(y_train))
    print(trainmat)
    testmat = np.zeros((5, 5), dtype=np.int32)  # Change params
    correct = 0
    with torch.no_grad():
        for n in range(len(X_test)):
            output = model(X_test[n])
            y_pred = output.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(y_test[n]).sum().item()
            testmat[y_test[n].item()][y_pred.item()] += 1
    print('Test accuracy:', correct / len(y_test))
    print(testmat)
    acc.append(correct / len(y_test))
    print('Time:', time.time() - byrjun)
    print('----')
print('Mean test accuracy:', np.mean(acc))

Epoch: 0
Train accuracy: 0.4607329842931937
[[60  0  0  0  5]
 [14  0  0  2  0]
 [32  0  1  1  1]
 [18  0  1  8  5]
 [23  0  1  0 19]]
Test accuracy: 0.359375
[[17  0  0  0  0]
 [ 7  0  0  0  1]
 [ 9  0  0  0  1]
 [10  0  0  4  5]
 [ 8  0  0  0  2]]
Time: 789.9530470371246
----
Epoch: 1
Train accuracy: 0.4031413612565445
[[61  0  0  1  3]
 [13  0  0  3  0]
 [33  0  0  2  0]
 [19  1  0  9  3]
 [25  1  0 10  7]]
Test accuracy: 0.453125
[[17  0  0  0  0]
 [ 7  0  0  1  0]
 [ 9  0  1  0  0]
 [10  1  0  8  0]
 [ 5  0  0  2  3]]
Time: 1564.7488777637482
----
Epoch: 2
Train accuracy: 0.41361256544502617
[[64  0  0  1  0]
 [14  0  0  2  0]
 [33  1  0  1  0]
 [18  2  0 12  0]
 [26  0  0 14  3]]
Test accuracy: 0.34375
[[17  0  0  0  0]
 [ 7  0  0  1  0]
 [10  0  0  0  0]
 [14  0  0  4  1]
 [ 9  0  0  0  1]]
Time: 2330.3524680137634
----
Epoch: 3
Train accuracy: 0.4293193717277487
[[63  0  1  0  1]
 [13  1  0  1  1]
 [33  0  1  1  0]
 [20  3  0  8  1]
 [28  2  1  3  9]]
Test accuracy: 0.359375
[[

Eftir 2 lotur spáir líkanið rétta flokkin fyrir 45% myndanna. Prósentið lækkar á eftir, líklega vegna ofurþjálfunar ("over-fitting").

## Lokaorð

Hvernig notum við líkan eins og þetta?
- Til að greina þrívíddarmyndir sem notendur teikna.
- Sem fyrsta skref í stærra líkani sem teiknir sjálft eftir texta sem notendur skrifa.

Vandamál:
- **Ekki nóg gögn.** Alvörulíkön nota fleiri en 10.000 mynda til að þjálfast.
- Ekki nógir flokkar. Allir flokkar, sérstaklega "líkamshlutar", eru mjög fjölbreyttir og alls ekki eins fyrir tölvuna.
- Við notum aðeins punkta, ekki línur eða flatir, til að greina.

En þetta er það besta fyrir gögnin sem ég var með. Og það tók nóg langan tíma til að þjálfa líkanið; þess vegna þurfum við betri tölvu.