In [1]:
from fastai.vision.utils import untar_data, URLs
# PASCAL dataset can have more than one label per image
path = untar_data(URLs.PASCAL_2007)

In [2]:
import pandas as pd

# uses CVS file to get the labels per image as opposed to file name and folder
# structure as conventions as previous single label datasets
# TODO : Is it possible to manage multiple labels per image with file/folder?
# Answer : Likely not worth it, since explicity CSV list is easy to check vs
# super fancy naming convention (experience from managing Pokemon assets lol)
df = pd.read_csv(path/'train.csv')
# inspect the CVS file by reading it into a Pandas DataFrame
# output shows that a list of categories in each image is shown as a space
# separated string
df.head()

Unnamed: 0,fname,labels,is_valid
0,000005.jpg,chair,True
1,000007.jpg,car,True
2,000009.jpg,horse person,True
3,000012.jpg,car,False
4,000016.jpg,bicycle,True


In [21]:
# PANDAS and DATAFRAMES
# Pandas is a Python library to edit and analyze tabular and time series data
# DataFrames are a table of rows and columns : the main data structure in Pandas

# Trailing :s are optional (in numpy, pytorch, pandas, etc)
# so both of these first row variants work
# first_row = df.iloc[0,:]
first_row = df.iloc[0]
first_column = df.iloc[:, 0]
print(f'--[first_row]--\n{first_row}\n--[first_column]--\n{first_column}')

# can also index by column name
fname_column = df['fname']
#print(f'--[fname_column]--\n{fname_column}')


--[first_row]--
fname       000005.jpg
labels           chair
is_valid          True
Name: 0, dtype: object
--[first_column]--
0       000005.jpg
1       000007.jpg
2       000009.jpg
3       000012.jpg
4       000016.jpg
           ...    
5006    009954.jpg
5007    009955.jpg
5008    009958.jpg
5009    009959.jpg
5010    009961.jpg
Name: fname, Length: 5011, dtype: object


In [52]:
from fastai.vision.data import DataBlock

# Create new columns and use them to do calculations
df1 = pd.DataFrame()
df1['a'] = [1,2,3, 4]
df1['b'] = [10, 20, 30, 40]
sum_column = df1['a'] + df1['b']
print(f'--[a_column]--\n{df1["a"]}\n--[b_column]--\n{df1["b"]}\n--[sum_column]--\n{sum_column}')


dblock = DataBlock()
dsets = dblock.datasets(df1)
dsets.train[0]
# TODO : Why isn't there a valid set? Set up one
#dsets.valid[0]

--[a_column]--
0    1
1    2
2    3
3    4
Name: a, dtype: int64
--[b_column]--
0    10
1    20
2    30
3    40
Name: b, dtype: int64
--[sum_column]--
0    11
1    22
2    33
3    44
dtype: int64


In [58]:
dblock = DataBlock()
dsets = dblock.datasets(df)

# a row of the DataFrame is returned TWICE ... once for the image and once for
# the label ... because the DataBlock assumes we have :
# - input
# - target
print(f'--[train]--\n{dsets.train[0]}\n--[valid]--\n{dsets.valid[0]}')


(a          2
 b         20
 fname      2
 labels    20
 Name: 1, dtype: int64,
 a          2
 b         20
 fname      2
 labels    20
 Name: 1, dtype: int64)

In [72]:
# We will need to capture explicitly from the DataFrame :
# - 'fname' the image file name
def get_image_path(row):
    #return row['fname']
    # we actually need the path name to open the image
    return path/'train'/row['fname']
# - 'labels' the list of labels
def get_labels(row):
    #return row['labels']
    # we actually need to split the labels on spaces
    return row['labels'].split(' ')

dblock=DataBlock(get_x=get_image_path, get_y=get_labels)
dsets=dblock.datasets(df)

print(f'--[train]--\n{dsets.train[0]}\n--[valid]--\n{dsets.valid[0]}')

--[train]--
(Path('/Users/mton/.fastai/data/pascal_2007/train/000162.jpg'), ['tvmonitor', 'person'])
--[valid]--
(Path('/Users/mton/.fastai/data/pascal_2007/train/002342.jpg'), ['person'])


In [62]:
# lambda is a keyword shortcut for defining an anonymous function inline
# however they aren't compatible with serialization (saving and loading)
# i.e. if you want to export your Learner after training you can't use lambda
# Lambda are not serializable because:
# - they are anonymous and not bound to a name or object in memory
# - lambda functions are also stateless and have no environment/context ...
#   storing and retrieving the state of a lambda function is not possible
'''
dblock = DataBlock(get_x=lambda r:r['fname'], get_y=lambda r:r['labels'])
dsets = dblock.datasets(df)
'''

In [83]:
from fastai.vision.data import ImageBlock, MultiCategoryBlock

# actually open the images and convert to tensors
dblock = DataBlock(
    # ImageBlock works fine because we have a file path
    # But... MultiCategoryBlock is needed because we have multiple labels
    blocks=(ImageBlock, MultiCategoryBlock), # type:ignore
    get_x=get_image_path,
    get_y=get_labels)

dsets = dblock.datasets(df)
# unlike with CategoryBlock where a single int is returned, 
# MultiCategoryBlock returns a list of labels
# We have a list of 0s, with a 1 where a category is present
# TODO : What is one-hot encoding?
# Answer : https://en.wikipedia.org/wiki/One-hot
# In digital circuits and machine learning, a one-hot is a group of bits among 
# which the legal combinations of values are only those with a 
# single high (1) bit and all the others low (0).[1] A similar implementation 
# in which all bits are '1' except one '0' is sometimes called one-cold
# TODO : How is the list of 0s and 1s created?  How is the length determined?
# Pytorch requires tensors, and everything has to be the same size/length

print(f'--[train]--\n{dsets.train[0]}\n--[valid]--\n{dsets.valid[0]}')
print(f'--[train {len(dsets.train.vocab)}]--\n{dsets.train.vocab}\n--[valid {len(dsets.valid.vocab)}]--\n{dsets.valid.vocab}')

--[train]--
(PILImage mode=RGB size=500x346, TensorMultiCategory([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
                     0., 0., 0., 0., 0.]))
--[valid]--
(PILImage mode=RGB size=500x333, TensorMultiCategory([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
                     0., 0., 0., 0., 0.]))
--[train 20]--
['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
--[valid 20]--
['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']


In [79]:
import torch

idxs = torch.where(dsets.train[0][1]==1.0)[0] # type: ignore
dsets.train.vocab[idxs]

AttributeError: 'CategoryMap' object has no attribute 'len'