In [7]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from io import BytesIO
import base64

from IPython.display import HTML

def pil_to_base64(img:Image)->str:
    with BytesIO() as buffer:
        img.save(buffer, 'jpeg')
        return base64.b64encode(buffer.getvalue()).decode()

def format_image(img:np.ndarray)->np.ndarray:
    return img.reshape(28, 28).dot(255).astype(np.uint8)
    
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
data = input_data.read_data_sets('data/fashion', source_url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/')

Extracting data/fashion\train-images-idx3-ubyte.gz
Extracting data/fashion\train-labels-idx1-ubyte.gz
Extracting data/fashion\t10k-images-idx3-ubyte.gz
Extracting data/fashion\t10k-labels-idx1-ubyte.gz


In [41]:
df = pd.DataFrame({'image_array':[format_image(x) for x in data.train.images], 
                   'label':data.train.labels})


label_dict = {
    0:'Top',
    1:'Trouser',
    2:'Pullover',
    3:'Dress',
    4:'Coat',
    5:'Sandal',
    6:'Shirt',
    7:'Sneaker',
    8:'Bag',
    9:'Boot'}

df = (df
      .assign(image_pil = lambda x: x['image_array'].apply(Image.fromarray))
      .assign(target=lambda x: x['label'].map(label_dict))
     )

In [42]:
df.head()

Unnamed: 0,image_array,label,image_pil,target
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",4,<PIL.Image.Image image mode=L size=28x28 at 0x...,Coat
1,"[[0, 0, 0, 0, 0, 0, 0, 44, 55, 96, 201, 35, 0,...",0,<PIL.Image.Image image mode=L size=28x28 at 0x...,Top
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",7,<PIL.Image.Image image mode=L size=28x28 at 0x...,Sneaker
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",9,<PIL.Image.Image image mode=L size=28x28 at 0x...,Boot
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",9,<PIL.Image.Image image mode=L size=28x28 at 0x...,Boot


In [43]:
def display_dataframe(df, original_col_width=50):
    pd.set_option('display.max_colwidth',-1)
    visaual_dataframe = HTML(df.to_html(formatters={
        'image_array':lambda x: f'{x.shape} numpy array', 
        'image_pil': lambda x: f"""<img src="data:image/jpeg; base64, {pil_to_base64(x)}" alt="Black box">"""},escape=False))
    pd.set_option('display.max_colwidth', original_col_width)
    return visaual_dataframe

display_dataframe(df.head(5))

Unnamed: 0,image_array,label,image_pil,target
0,"(28, 28) numpy array",4,,Coat
1,"(28, 28) numpy array",0,,Top
2,"(28, 28) numpy array",7,,Sneaker
3,"(28, 28) numpy array",9,,Boot
4,"(28, 28) numpy array",9,,Boot


In [44]:
df.head()

Unnamed: 0,image_array,label,image_pil,target
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",4,<PIL.Image.Image image mode=L size=28x28 at 0x...,Coat
1,"[[0, 0, 0, 0, 0, 0, 0, 44, 55, 96, 201, 35, 0,...",0,<PIL.Image.Image image mode=L size=28x28 at 0x...,Top
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",7,<PIL.Image.Image image mode=L size=28x28 at 0x...,Sneaker
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",9,<PIL.Image.Image image mode=L size=28x28 at 0x...,Boot
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",9,<PIL.Image.Image image mode=L size=28x28 at 0x...,Boot


In [72]:
tmp = df['label'].value_counts()