In [1]:

%sh
conda install -c fastai fastai


In [2]:
#Pobranie pliku MNIST
import urllib 

urllib.request.urlretrieve("https://pjreddie.com/media/files/mnist_test.csv","/dbfs/FileStore/test.csv")

urllib.request.urlretrieve("https://pjreddie.com/media/files/mnist_train.csv","/dbfs/FileStore/train.csv")

In [3]:
#Importowanie niezbędnych bibliotek
from fastai.vision import *
from fastai import *
from pathlib import Path
import os
import pandas as pd

In [4]:
#Utworzenie ścieżki 

train_df = pd.read_csv("/dbfs/FileStore/train.csv")
test_df = pd.read_csv("/dbfs/FileStore/test.csv")


In [5]:
#deklaracja zmiennych ze ścieżkami
TRAIN = "dbfs:/FileStore/train/"
TEST = "dbfs:/FileStore/test/"

In [6]:
#Utworzenie katalogów do poszczególnych kategorii obrazów
for index in range(10):
   dbutils.fs.mkdirs(TRAIN+"/"+str(index))

In [7]:
dbutils.fs.mkdirs(TEST)


In [8]:
#Funkcja do tworzenia obrazów z plików csv
import numpy as np
from PIL import Image

def saveDigit(digit, filepath):
    digit = digit.reshape(28,28)
    digit = digit.astype(np.uint8)
    
    img = Image.fromarray(digit)

    img.save(filepath)

In [9]:
#deklaracja zmiennych ze ścieżkami
TRAIN = "/dbfs/FileStore/train/"
TEST =  "/dbfs/FileStore/test/"

In [10]:
#Zamieńmy CSV na obrazki
for index, row in train_df.iterrows():
    label, digit = row[0], row[1:]
    
    folder = TRAIN + str(int(label))
    filename = f"{index}.jpg"
    filepath = folder+"/"+filename
    
    digit = digit.values

    saveDigit(digit, filepath)

In [11]:
#Zamieńmy CSV na obrazki
for index, row  in test_df.iterrows():
    digit =  row[1:]
    folder = TEST
    
    filename = f"{index}.jpg"
    filepath = folder+filename
    
    digit = digit.values
  
    
    saveDigit(digit, filepath)

In [12]:
#Przygoujmy pliki do trenowania
tfms = get_transforms(do_flip=False)
data = ImageDataBunch.from_folder(
    path = "/dbfs/FileStore/",
    train = "train",
    test = "test",
    valid_pct = 0.2,
    bs = 16,
    size = 28,
    num_workers = 0,
    ds_tfms = tfms
)

In [13]:
learn = cnn_learner(data, base_arch=models.resnet18, metrics=accuracy, model_dir="/tmp/models", callback_fns=ShowGraph)

In [14]:
#Uczenie sieci neuronwej 
learn.fit_one_cycle(cyc_len=2)

In [15]:
class_score, y = learn.get_preds(DatasetType.Test)

In [16]:
probabilities = class_score[0].tolist()
[f"{index}: {probabilities[index]}" for index in range(len(probabilities))]

In [17]:
class_score = np.argmax(class_score, axis=1)

In [18]:
ImageId = [os.path.splitext(path)[0] for path in os.listdir("/dbfs/FileStore/test/")]
ImageId = [int(path) for path in ImageId]
ImageId = [ID for ID in ImageId]

In [19]:
submission  = pd.DataFrame({
    "ImageId": ImageId,
    "Label": class_score
})
submission.to_csv("/dbfs/FileStore/submission.csv", index=False)
display(submission.sort_values(by=['ImageId']).head(3))
display(submission.tail(300))

ImageId,Label
9728,6
9729,6
973,2
9730,9
9731,8
9732,9
9733,7
9734,4
9735,6
9736,1


In [20]:
#Sprawdźmy Rezultaty
#poniżej wpiszmy numer obrazka, aby go wyświetlić 
image_df = spark.read.format("image").load("/FileStore/test/973.jpg")
display(image_df)

image
"List(dbfs:/FileStore/test/973.jpg, 28, 28, 1, 0, Binary image data placeholder. Access the image data field directly to view raw binary data., iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAABw0lEQVR42sVSTYsTQRB9/TE9mUlmIzkbLx72ukcRLyq4IHjxoCzkoCgiCh5XEQ9C8Cf4PxQUPMiS07IaRdG9JLCgSwy4G4PBWROTScrqng/iL7BgmqKr6r1XrwdaAvAMn9IoTg04DT1koXyRp37oc5NwPTbK/ElpIu6xg5B8odJrl1ggaECUlAxcIk0GpUuWVElUz95/dE4wZiUf5C5XA+7tJVOa7DdP29sQhQwG0a+GlPwkSmbP64xcqNUaqtGKp+P9ndbuIKHbIkxl8JiT9JGofXGN6W5Of5PQTp9TyqDyRTw5A6WqQGsyWOYMuOvSw3g93folzbk9W0VCMKlT4PNWx3epbxfJOZmgDOVJhqrKJ/GPW7xfjiulpa8wUGSw2acuImiR2a0ClZlcU6f26GsTVYhiTzt4jM8awvmCHmDF2Yt/wuDauxF9qLM2Y5V4Tk4WJfGWqHMyfSSxUrb+F7XoDY2enkBJQUU+co/SqG8t6FfjxsbVC1eub3/63uscekFRPE80jumA34WG9GexSMZLk6tb34gryYyOxt3u+y+PN6Jl3LuvR58nvfazO5ftz1HOHEwFWbMj5zG/jTYIEOE/xF/CZX8X0mx+kAAAAABJRU5ErkJggg==)"
