In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import PIL
from PIL import Image

### Pillow resource:

https://machinelearningmastery.com/how-to-load-and-manipulate-images-for-deep-learning-in-python-with-pil-pillow/

You may need to install Pillow, can be done via pip:

In [2]:
!pip install Pillow



Let's look at one image and its size:

In [2]:

# load the image
image = Image.open('./attachments/SET0007.jpg')

# summarize some details about the image
print(image.format)
print(image.mode)
print(image.size)

# show the image
image.show()

JPEG
RGB
(2544, 3296)


Let's crop it so that all shapes can still be seen on each card, but we cut out some white space common to all cards. Doing the cropping so that the dimensions are same as bee image set, in case I want to try new images on neural nets that were trained on a different set of images entirely.

In [3]:
# create a cropped image
cropped = image.crop((60, 20, 950, 643))
# show cropped image
cropped.show()

Now we resize to save computational space:

In [4]:
basewidth = 50

wpercent = (basewidth / float(cropped.size[0]))
hsize = int((float(cropped.size[1]) * float(wpercent)))
img = cropped.resize((basewidth, hsize), PIL.Image.ANTIALIAS)
img.show()

In [9]:
img.size

(50, 35)

In [25]:
cropped.size

(890, 623)

In [5]:
img_vec = np.array(img)
img_vec.shape

(35, 50, 3)

In [8]:
img_vec

array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [254, 254, 254]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]]

In [6]:
img_again = Image.fromarray(img_vec)

In [7]:
img_again.show()

In [52]:
practice = np.array([[1, 'a'],[2,'b'],[3,'c']])

In [53]:
practice[:,1]

array(['a', 'b', 'c'], dtype='<U21')

In [54]:
practice

array([['1', 'a'],
       ['2', 'b'],
       ['3', 'c']], dtype='<U21')

https://note.nkmk.me/en/python-numpy-image-processing/

## Iterating through a folder

In [161]:
image_df = pd.DataFrame(img_vec_flat)

# creating color column
image_df.insert(loc=0, column='color', value='red')

# creating shape column
image_df.insert(loc=0, column='shape', value='squiggle')

# creating number column
image_df.insert(loc=0, column='number', value=3)

# creating fill column
image_df.insert(loc=0, column='fill', value='striped')

# creating fill column
image_df.insert(loc=0, column='card_filename', value='SET0007')

In [162]:
image_df

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SET0007,striped,3,squiggle,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [163]:
35*50*3

5250

In [164]:
df = image_df.copy()

In [168]:
# filenames= []
i = 0

for filename in os.listdir('./attachments'):
    # get the image
    image = Image.open('./attachments/' + filename)
    
    #crop out white space
    cropped = image.crop((60, 20, 950, 643))
    
    #rescale to 50x35
    basewidth = 50
    wpercent = (basewidth / float(cropped.size[0]))
    hsize = int((float(cropped.size[1]) * float(wpercent)))
    img = cropped.resize((basewidth, hsize), PIL.Image.ANTIALIAS)
    
    img.show()
    
    #reshape to vector to be used as row in dataframe
    img_vec = np.array(img)
    img_vec_flat = img_vec.reshape(1,-1)
    
    # creating dataframe
    image_df = pd.DataFrame(img_vec_flat)

    # creating color column
    color = input('color?')
    image_df.insert(loc=0, column='color', value=color)

    # creating shape column
    shape = input('shape?')
    image_df.insert(loc=0, column='shape', value=shape)

    # creating number column
    number = input('number?')
    image_df.insert(loc=0, column='number', value=number)

    # creating fill column
    fill = input('fill?')
    image_df.insert(loc=0, column='fill', value=fill)

    # creating fill column
    image_df.insert(loc=0, column='card_filename', value=filename)
    
    # concatonate with final dataframe
    df = pd.concat([df,image_df], axis = 0)
    
    
    i = i+1
    print(f'done with {i}')
    

color?purple
shape?oval
number?1
fill?outlined
done with 1
color?purple
shape?diamond
number?2
fill?outlined
done with 2
color?purple
shape?diamond
number?2
fill?striped
done with 3
color?purple
shape?diamond
number?3
fill?outlined
done with 4
color?red
shape?oval
number?2
fill?solid
done with 5
color?purple
shape?squiggle
number?2
fill?solid
done with 6
color?red
shape?oval
number?3
fill?striped
done with 7
color?green
shape?oval
number?2
fill?striped
done with 8
color?red
shape?diamond
number?3
fill?solid
done with 9
color?purple
shape?oval
number?2
fill?solid
done with 10
color?green
shape?oval
number?1
fill?solid
done with 11
color?green
shape?squiggle
number?outlined
fill?solid
done with 12
color?purple
shape?squiggle
number?1
fill?striped
done with 13
color?purple
shape?squiggle
number?2
fill?outlined
done with 14
color?green
shape?squiggle
number?1
fill?striped
done with 15
color?green
shape?diamond
number?3
fill?striped
done with 16
color?red
shape?diamond
number?2
fill?solid
d

IsADirectoryError: [Errno 21] Is a directory: './attachments/.ipynb_checkpoints'

In [172]:
# filenames= []
i = 60

for filename in os.listdir('./attachments/attachments61to81'):
    # get the image
    image = Image.open('./attachments/attachments61to81/' + filename)
    
    #crop out white space
    cropped = image.crop((60, 20, 950, 643))
    
    #rescale to 50x35
    basewidth = 50
    wpercent = (basewidth / float(cropped.size[0]))
    hsize = int((float(cropped.size[1]) * float(wpercent)))
    img = cropped.resize((basewidth, hsize), PIL.Image.ANTIALIAS)
    
    img.show()
    
    #reshape to vector to be used as row in dataframe
    img_vec = np.array(img)
    img_vec_flat = img_vec.reshape(1,-1)
    
    # creating dataframe
    image_df = pd.DataFrame(img_vec_flat)

    # creating color column
    color = input('color?')
    image_df.insert(loc=0, column='color', value=color)

    # creating shape column
    shape = input('shape?')
    image_df.insert(loc=0, column='shape', value=shape)

    # creating number column
    number = input('number?')
    image_df.insert(loc=0, column='number', value=number)

    # creating fill column
    fill = input('fill?')
    image_df.insert(loc=0, column='fill', value=fill)

    # creating fill column
    image_df.insert(loc=0, column='card_filename', value=filename)
    
    # concatonate with final dataframe
    df = pd.concat([df,image_df], axis = 0)
    
    
    i = i+1
    print(f'done with {i}')
    

color?purple
shape?oval
number?1
fill?outlined
done with 61
color?purple
shape?diamond
number?2
fill?outlined
done with 62
color?purple
shape?diamond
number?2
fill?striped
done with 63
color?purple
shape?diamond
number?3
fill?outlined
done with 64
color?red
shape?oval
number?2
fill?solid
done with 65
color?red
shape?diamond
number?3
fill?solid
done with 66
color?purple
shape?oval
number?2
fill?solid
done with 67
color?green
shape?squiggle
number?1
fill?outlined
done with 68
color?purple
shape?squiggle
number?1
fill?striped
done with 69
color?purple
shape?squiggle
number?2
fill?outlined
done with 70
color?green
shape?squiggle
number?1
fill?striped
done with 71
color?red
shape?diamond
number?2
fill?solid
done with 72
color?green
shape?squiggle
number?2
fill?outlined
done with 73
color?green
shape?oval
number?2
fill?solid
done with 74
color?green
shape?oval
number?3
fill?outlined
done with 75
color?green
shape?squiggle
number?2
fill?striped
done with 76
color?green
shape?squiggle
number?3

In [200]:
df.head(10)

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SET0007,striped,3,squiggle,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0063.jpg,outlined,1,oval,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0077.jpg,outlined,2,diamond,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0076.jpg,striped,2,diamond,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0062.jpg,outlined,3,diamond,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0074.jpg,solid,2,oval,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0060.jpg,solid,2,squiggle,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0048.jpg,striped,3,oval,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0049.jpg,striped,2,oval,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0061.jpg,solid,3,diamond,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [201]:
df.tail(10)

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SeT0064.jpg,outlined,2,squiggle,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0070.jpg,striped,1,squiggle,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0066.jpg,solid,2,diamond,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0072.jpg,outlined,2,squiggle,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0073.jpg,solid,2,oval,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0067.jpg,outlined,3,oval,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0069.jpg,striped,2,squiggle,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0068.jpg,striped,3,squiggle,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0078.jpg,solid,2,diamond,purple,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0079.jpg,striped,2,oval,red,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [204]:
df['card_filename'].value_counts()

SeT0073.jpg    2
SeT0074.jpg    2
SeT0072.jpg    2
SeT0066.jpg    2
SeT0075.jpg    2
              ..
SeT0034.jpg    1
SeT0024.jpg    1
SET0007.jpg    1
SeT0079.jpg    1
SET0009.jpg    1
Name: card_filename, Length: 65, dtype: int64

In [205]:
df[df['card_filename']== 'SeT0073.jpg']

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SeT0073.jpg,solid,2,oval,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SeT0073.jpg,solid,2,oval,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [206]:
df['color'].value_counts()

purple    29
green     26
red       23
red*       1
greem      1
Name: color, dtype: int64

In [207]:
df['shape'].value_counts()

diamond     30
squiggle    25
oval        25
Name: shape, dtype: int64

In [208]:
df['number'].value_counts()

2           33
3           23
1           20
1*           2
outlined     1
3            1
Name: number, dtype: int64

In [22]:
len(filenames)

81

In [174]:
df.shape

(80, 5255)

In [176]:
df.to_csv('SET_data.csv')

In [209]:
filenames = []
for filename in os.listdir('./attachments'):
    filenames.append(filename)
    
for filename in os.listdir('./attachments/attachments61to81'):
    filenames.append(filename)

In [192]:
filenames_set = set(filenames)

In [193]:
df_set = set(df['card_filename'])

In [194]:
diff = np.setdiff1d(filenames_set, df_set)

In [195]:
len(diff[0])

83

In [198]:
len(df['card_filename'])

80

In [211]:
df.drop_duplicates(inplace = True)

In [212]:
df.shape

(66, 5255)

In [222]:
df_set = set(df['card_filename'])
filenames_set = set(filenames)

In [224]:
diff = filenames_set.difference(df_set)

In [227]:
diff.remove('.DS_Store')
diff.remove('.ipynb_checkpoints')
diff.remove('attachments61to81')


In [228]:
diff

{'SeT0040.jpg',
 'SeT0041.jpg',
 'SeT0042.jpg',
 'SeT0043.jpg',
 'SeT0044.jpg',
 'SeT0045.jpg',
 'SeT0046.jpg',
 'SeT0047.jpg',
 'SeT0050.jpg',
 'SeT0051.jpg',
 'SeT0052.jpg',
 'SeT0053.jpg',
 'SeT0054.jpg',
 'SeT0055.jpg',
 'SeT0056.jpg',
 'SeT0057.jpg'}

In [230]:
# filenames= []
i = 0

for filename in diff:
    # get the image
    image = Image.open('./attachments/' + filename)
    
    #crop out white space
    cropped = image.crop((60, 20, 950, 643))
    
    #rescale to 50x35
    basewidth = 50
    wpercent = (basewidth / float(cropped.size[0]))
    hsize = int((float(cropped.size[1]) * float(wpercent)))
    img = cropped.resize((basewidth, hsize), PIL.Image.ANTIALIAS)
    
    img.show()
    
    #reshape to vector to be used as row in dataframe
    img_vec = np.array(img)
    img_vec_flat = img_vec.reshape(1,-1)
    
    # creating dataframe
    image_df = pd.DataFrame(img_vec_flat)

    # creating color column
    color = input('color?')
    image_df.insert(loc=0, column='color', value=color)

    # creating shape column
    shape = input('shape?')
    image_df.insert(loc=0, column='shape', value=shape)

    # creating number column
    number = input('number?')
    image_df.insert(loc=0, column='number', value=number)

    # creating fill column
    fill = input('fill?')
    image_df.insert(loc=0, column='fill', value=fill)

    # creating fill column
    image_df.insert(loc=0, column='card_filename', value=filename)
    
    # concatonate with final dataframe
    df = pd.concat([df,image_df], axis = 0)
    
    
    i = i+1
    print(f'done with {i}')
    

color?red
shape?squiggle
number?1
fill?outlined
done with 1
color?green
shape?oval
number?1
fill?striped
done with 2
color?purple
shape?oval
number?1
fill?solid
done with 3
color?red
shape?squiggle
number?1
fill?solid
done with 4
color?purple
shape?oval
number?1
fill?striped
done with 5
color?purple
shape?oval
number?3
fill?striped
done with 6
color?red
shape?squiggle
number?3
fill?outlined
done with 7
color?red
shape?squiggle
number?2
fill?outlined
done with 8
color?green
shape?oval
number?3
fill?striped
done with 9
color?red
shape?squiggle
number?3
fill?striped
done with 10
color?purple
shape?squiggle
number?1
fill?solid
done with 11
color?purple
shape?diamond
number?3
fill?solid
done with 12
color?green
shape?squiggle
number?1
fill?solid
done with 13
color?red
shape?oval
number?1
fill?outlined
done with 14
color?red
shape?oval
number?3
fill?solid
done with 15
color?green
shape?diamond
number?2
fill?striped
done with 16


In [231]:
df.shape

(82, 5255)

# Cleanup

In [255]:
df['color'].value_counts()

green     27
red       27
purple    27
red*       1
Name: color, dtype: int64

In [239]:
df[df['color']=='greem']

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SeT0034.jpg,solid,2,squiggle,greem,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [241]:
df.replace({'greem':'green'},inplace = True)

In [243]:
df['shape'].value_counts()

squiggle    28
oval        27
diamond     27
Name: shape, dtype: int64

In [None]:
# there is an extra squiggle card

In [253]:
df.to_csv('SET_data2.csv')

In [254]:
df['number'].value_counts()

2           27
3           26
1           25
1*           2
outlined     1
3            1
Name: number, dtype: int64

In [256]:
df[df['number']=='1*']

Unnamed: 0,card_filename,fill,number,shape,color,0,1,2,3,4,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
0,SET0002.jpg,solid,1*,oval,red*,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
0,SET.jpg,striped,1*,diamond,green,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [257]:
df.replace({'1*':1, 'red*':'red'}, inplace = True)

In [266]:
df[df['number']=='outlined']

Unnamed: 0_level_0,fill,number,shape,color,0,1,2,3,4,5,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
card_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SeT0071.jpg,solid,outlined,squiggle,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [270]:
df.loc['SeT0071.jpg']

Unnamed: 0_level_0,fill,number,shape,color,0,1,2,3,4,5,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
card_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SeT0071.jpg,solid,outlined,squiggle,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0071.jpg,outlined,1,squiggle,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [272]:
df = df[df.number != 'outlined']

In [263]:
df.set_index('card_filename', inplace = True)

In [273]:
df['number'].value_counts()

2    27
3    26
1    25
1     2
3     1
Name: number, dtype: int64

In [274]:
set(df['number'])

{1, '1', '2', 3, '3'}

In [275]:
df['number'] = df['number'].astype(int)

In [276]:
df['number'].value_counts()

3    27
2    27
1    27
Name: number, dtype: int64

In [277]:
df['shape'].value_counts()

squiggle    27
oval        27
diamond     27
Name: shape, dtype: int64

In [278]:
df['color'].value_counts()

red       28
purple    27
green     26
Name: color, dtype: int64

In [279]:
df['fill'].value_counts()

striped     28
solid       27
outlined    24
outliend     1
             1
Name: fill, dtype: int64

In [280]:
df.replace({'outliend':'outlined'}, inplace = True)

In [281]:
df['fill'].value_counts()

striped     28
solid       27
outlined    25
             1
Name: fill, dtype: int64

In [282]:
set(df['fill'])

{'', 'outlined', 'solid', 'striped'}

In [283]:
df[df['fill']=='']

Unnamed: 0_level_0,fill,number,shape,color,0,1,2,3,4,5,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
card_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SET0017.jpg,,1,diamond,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [285]:
df.replace({'':'outlined'}, inplace = True)

In [286]:
df['fill'].value_counts()

striped     28
solid       27
outlined    26
Name: fill, dtype: int64

In [287]:
df[df['fill']=='striped']

Unnamed: 0_level_0,fill,number,shape,color,0,1,2,3,4,5,...,5240,5241,5242,5243,5244,5245,5246,5247,5248,5249
card_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SET0007,striped,3,squiggle,red,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0076.jpg,striped,2,diamond,purple,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0048.jpg,striped,3,oval,red,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0049.jpg,striped,2,oval,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0065.jpg,striped,1,squiggle,purple,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0070.jpg,striped,1,squiggle,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SeT0058.jpg,striped,3,diamond,green,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SET0015.jpg,striped,3,diamond,red,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SET0001.jpg,striped,3,squiggle,purple,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
SET0003.jpg,striped,2,squiggle,red,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


In [289]:
df.duplicated().value_counts()

False    81
dtype: int64

In [291]:
df.to_csv('final_SET.csv')