# Resize Images

In [1]:
%load_ext lab_black

In [2]:
%load_ext google.cloud.bigquery

In [3]:
%load_ext line_profiler

In [54]:
import os
from tqdm.notebook import tqdm
from PIL import Image, ImageFile

In [55]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

## Load Data

In [28]:
%%bigquery df --project zenscr-seefood-dev

SELECT image_path
FROM `zenscr-seefood-dev.sparkrecipes.base_filtered`
INNER JOIN `zenscr-seefood-dev.sparkrecipes.image_path`
USING (recipe_id)
WHERE RAND() < 0.0001

In [29]:
df

Unnamed: 0,title,image_path,total_calories
0,Fruit Dazzle,../../data/images/347283/000012,194.5
1,grilled salmon,../../data/images/94743/000002,240.6
2,Aj's Taco Salad,../../data/images/37574/000013,192.7
3,Overnight Oatmeal,../../data/images/348596/000013,202.2
4,No noodle Tuna Casserole,../../data/images/344032/000010,260.4
...,...,...,...
120,Broccoli Pasta,../../data/images/4646/000007,126.2
121,Honey whole grain lunch buns,../../data/images/89043/000001,206.5
122,Carissa's Low Fat Veggie Soup,../../data/images/413047/000004,141.7
123,Herb Rolls,../../data/images/334314/000008,147.3


## Analyze Loading of Images

In [30]:
def load_image(path):
    with Image.open(path) as f:
        image = f.convert("RGB")
    return image

In [31]:
def load_all_images(df, col):
    for path in df[col]:
        image = load_image(path)
        del image

In [32]:
def get_resolution(image_path):
    image = load_image(image_path)
    ret = f"{image.width}x{image.height}"
    del image
    return ret

In [33]:
%lprun  -f load_image load_all_images(df, col="image_path")

Timer unit: 1e-06 s

Total time: 1.88546 s
File: <ipython-input-30-dc32862bcd6f>
Function: load_image at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def load_image(path):
     2       125     582512.0   4660.1     30.9      with Image.open(path) as f:
     3       125    1302548.0  10420.4     69.1          image = f.convert("RGB")
     4       125        401.0      3.2      0.0      return image

In [34]:
df["resolution"] = df.image_path.map(get_resolution)

In [35]:
df["filesize"] = df.image_path.map(lambda path: os.stat(path).st_size)

In [36]:
df.sample(10)

Unnamed: 0,title,image_path,total_calories,resolution,filesize
33,Sukhothai Pad Thai,../../data/images/412667/000005,474.1,800x800,112341
80,Chick Pea Salad,../../data/images/204015/000004,131.4,680x1020,142080
47,Peanut Noodles with Shrimp,../../data/images/18936/000005,235.7,680x1020,255657
81,Stramboli,../../data/images/256472/000010,331.8,800x450,67276
55,"Chicken, Corn, and Black Bean Stoup",../../data/images/402579/000013,403.1,656x438,125174
57,Faux Puff Pastry,../../data/images/419701/000004,66.4,600x495,404620
59,Chicken Taco Meat,../../data/images/178986/000005,67.9,640x424,232237
50,Buttermilk Pancakes,../../data/images/274660/000003,221.7,440x293,28356
68,L. R. lemon chicken,../../data/images/285431/000011,112.3,600x900,107536
77,Basic Chili over rice (no meat),../../data/images/48379/000001,207.7,720x1080,85092


## Resize Images and Store as BMP

In [37]:
df["resized_image_path"] = df.image_path.map(lambda path: f"{path}_resized")

In [56]:
def resize_images(
    df, size=(224, 224), source_col="image_path", target_col="resized_image_path"
):
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):
        with Image.open(row[source_col]) as im:
            im = im.convert("RGB")
            im.thumbnail(size, Image.ANTIALIAS)
            im.save(row[target_col], "BMP")

In [49]:
resize_images(df)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [18]:
%lprun  -f load_image load_all_images(df, col="resized_image_path")

Timer unit: 1e-06 s

Total time: 24.4156 s
File: <ipython-input-8-dc32862bcd6f>
Function: load_image at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def load_image(path):
     2     13092    9260519.0    707.3     37.9      with Image.open(path) as f:
     3     13092   15137841.0   1156.3     62.0          image = f.convert("RGB")
     4     13092      17270.0      1.3      0.1      return image

In [41]:
df["resized_resolution"] = df.resized_image_path.map(get_resolution)

In [42]:
df["resized_filesize"] = df.resized_image_path.map(lambda path: os.stat(path).st_size)

In [43]:
df["filesize_diff"] = df["filesize"] - df["resized_filesize"]

In [44]:
df

Unnamed: 0,title,image_path,total_calories,resolution,filesize,resized_image_path,resized_resolution,resized_filesize,filesize_diff
0,Fruit Dazzle,../../data/images/347283/000012,194.5,360x360,46228,../../data/images/347283/000012_resized,224x224,150582,-104354
1,grilled salmon,../../data/images/94743/000002,240.6,800x800,186403,../../data/images/94743/000002_resized,224x224,150582,35821
2,Aj's Taco Salad,../../data/images/37574/000013,192.7,640x960,31682,../../data/images/37574/000013_resized,149x224,100406,-68724
3,Overnight Oatmeal,../../data/images/348596/000013,202.2,800x450,68275,../../data/images/348596/000013_resized,224x126,84726,-16451
4,No noodle Tuna Casserole,../../data/images/344032/000010,260.4,735x490,300086,../../data/images/344032/000010_resized,224x149,100182,199904
...,...,...,...,...,...,...,...,...,...
120,Broccoli Pasta,../../data/images/4646/000007,126.2,600x900,273375,../../data/images/4646/000007_resized,149x224,100406,172969
121,Honey whole grain lunch buns,../../data/images/89043/000001,206.5,450x338,55710,../../data/images/89043/000001_resized,224x168,112950,-57240
122,Carissa's Low Fat Veggie Soup,../../data/images/413047/000004,141.7,700x1050,120295,../../data/images/413047/000004_resized,149x224,100406,19889
123,Herb Rolls,../../data/images/334314/000008,147.3,570x855,127025,../../data/images/334314/000008_resized,149x224,100406,26619


In [45]:
df.filesize.sum()

14214868

In [46]:
df.resized_filesize.sum()

13207294

In [47]:
df.describe()

Unnamed: 0,total_calories,filesize,resized_filesize,filesize_diff
count,125.0,125.0,125.0,125.0
mean,213.4992,113718.944,105658.352,8060.592
std,111.300372,87980.28743,19687.977127,90734.799435
min,31.2,21067.0,51126.0,-104354.0
25%,141.7,57224.0,100182.0,-42867.0
50%,199.1,85152.0,100406.0,-17296.0
75%,265.4,127910.0,112950.0,25692.0
max,505.3,492033.0,150582.0,416715.0


## Resize all

In [50]:
%%bigquery df_all --project zenscr-seefood-dev

SELECT image_path
FROM `zenscr-seefood-dev.sparkrecipes.base_filtered`
INNER JOIN `zenscr-seefood-dev.sparkrecipes.image_path`
USING (recipe_id)

In [52]:
df_all

Unnamed: 0,image_path
0,../../data/images/19/000001
1,../../data/images/19/000010
2,../../data/images/19/000011
3,../../data/images/19/000012
4,../../data/images/19/000013
...,...
1311732,../../data/images/435170/000005
1311733,../../data/images/435170/000006
1311734,../../data/images/435170/000007
1311735,../../data/images/435170/000008


In [57]:
resize_images(df_all, source_col="image_path", target_col="image_path")

HBox(children=(FloatProgress(value=0.0, max=1311737.0), HTML(value='')))

  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)



