In [2]:
# Wenn Daten nicht aufgeteilt sind:
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, y)
# Unsere Daten sind aber schon aufgeteilt!

In [3]:
# Vorstellung: MNIST-Daten!
# http://yann.lecun.com/exdb/mnist/
# FashionMNIST: https://github.com/zalandoresearch/fashion-mnist

import gzip
import numpy as np

def open_images(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        return np.frombuffer(data, dtype=np.uint8, offset=16)\
            .reshape(-1, 28, 28)\
            .astype(np.float32)


def open_labels(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        return np.frombuffer(data, dtype=np.uint8, offset=8)

In [4]:
X_train = open_images("C:/Users/Steffen/Documents/Jupyter Notebooks/Udemy_Kurs/Kursmaterialien/Kursmaterialien/data/fashion/train-images-idx3-ubyte.gz")
y_train = open_labels("C:/Users/Steffen/Documents/Jupyter Notebooks/Udemy_Kurs/Kursmaterialien/Kursmaterialien/data/fashion/train-labels-idx1-ubyte.gz")

# Testdaten!
X_test = open_images("C:/Users/Steffen/Documents/Jupyter Notebooks/Udemy_Kurs/Kursmaterialien/Kursmaterialien/data/fashion/t10k-images-idx3-ubyte.gz")
y_test = open_labels("C:/Users/Steffen/Documents/Jupyter Notebooks/Udemy_Kurs/Kursmaterialien/Kursmaterialien/data/fashion/t10k-labels-idx1-ubyte.gz")

In [9]:
from tensorflow.keras.utils import to_categorical

print(y_train)
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

[9 0 0 ... 3 0 5]


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [12]:
model = Sequential()
# 100 Neuronen im Hidden Layer, Sigmoid als Aktivierungsfunktion, 28 x 28 px --> 784 Eingänge
model.add(Dense(100, activation="sigmoid", input_shape=(784,)))
# 10 Ausgangsneuronen, Sigmoid als Aktivierungsfunktion
model.add(Dense(10, activation="sigmoid"))
# Modell kompilieren --> Code wird für Hardware (GPU) vorbereitet, optimizer für weights: Stochastic Gradient Descent, Metriken angeben
model.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])
# X_train Daten reshapen Matrix von 28 x 28 --> Liste 784
X_train_784 = X_train.reshape(60000, 784)
# fit(Eingabedaten, Labels, Epochen: Daten werden 10x durchlaufen, Batch Size: batch von 1000 Eingabedaten, dann weight opt.)
model.fit(X_train_784, y_train_cat, epochs=10, batch_size=1000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x26def24b580>

In [13]:
# Genauigkeit mit Testdaten bestimmen
print(model.metrics_names)
X_test_784 = X_test.reshape(10000, 784)
model.evaluate(X_test_784, y_test_cat)

['loss', 'accuracy']


[0.703478991985321, 0.7806000113487244]

In [19]:
# Vorher:
# - ja / nein
#  10% / 90% Daten

# Nachher:
# - 10 Kategorien: T-Shirt / Hose / Schuhe / ...
# Aufteilung gleichmäßiger

pred = model.predict(X_test_784)
print(pred.shape)
print(pred)

(10000, 10)
[[0.16104856 0.19916409 0.34908175 ... 0.86043835 0.75425696 0.9299139 ]
 [0.43030116 0.4216223  0.9759307  ... 0.330482   0.3698182  0.12776706]
 [0.7236103  0.9903196  0.54219496 ... 0.16836661 0.34091586 0.3058409 ]
 ...
 [0.7969186  0.14736113 0.3882739  ... 0.29712638 0.89550585 0.3092183 ]
 [0.572602   0.98733556 0.66178405 ... 0.311657   0.37190232 0.46944314]
 [0.14070895 0.39302543 0.38480237 ... 0.9277954  0.523662   0.8666292 ]]


In [25]:
print(pred[0]) # Schuh: Cat 9
print(np.argmax(pred[0]))

print(pred[1]) # Pullover: Cat 2
print(np.argmax(pred[1]))

[0.16104856 0.19916409 0.34908175 0.12107736 0.21494758 0.8799598
 0.15019825 0.86043835 0.75425696 0.9299139 ]
9
[0.43030116 0.4216223  0.9759307  0.43416494 0.9244586  0.4838769
 0.88179636 0.330482   0.3698182  0.12776706]
2


In [46]:
# Konfusionsmatrix erstellen
# Max für alle Daten
y_test_pred = np.argmax(pred, axis=1)

In [47]:
import pandas as pd
ytrue = pd.Series(y_test, name='actual') # ohne categorical
ypred = pd.Series(y_test_pred, name='pred')
print(ytrue)
print(ypred)

0       9
1       2
2       1
3       1
4       6
       ..
9995    9
9996    1
9997    8
9998    1
9999    5
Name: actual, Length: 10000, dtype: uint8
0       9
1       2
2       1
3       1
4       0
       ..
9995    9
9996    1
9997    8
9998    1
9999    5
Name: pred, Length: 10000, dtype: int64


In [48]:
pd.crosstab(ytrue, ypred)

pred,0,1,2,3,4,5,6,7,8,9
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,797,5,15,88,10,2,59,0,24,0
1,13,921,15,39,8,0,2,0,2,0
2,23,2,648,11,197,0,108,0,11,0
3,55,11,9,846,29,1,42,0,7,0
4,0,5,147,49,685,1,102,0,11,0
5,2,1,1,1,0,793,0,112,12,78
6,243,4,142,59,147,1,375,0,29,0
7,0,0,0,0,0,30,0,879,2,89
8,5,0,14,16,3,6,20,11,924,1
9,0,0,0,1,0,14,0,46,1,938
