In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
!wget http://94.228.125.103:8000/20220410-minvody_photos-512px-00001.zip

In [None]:
!unzip 20220410-minvody_photos-512px-00001.zip

In [11]:
!wget http://94.228.125.103:8000/vklike-efficientnetb3-epoch25.h5

In [12]:
import tensorflow as tf

from typing import List, Dict, Tuple, Optional
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm
from pathlib import Path
from PIL import Image

In [17]:
DATA = Path('20220410-minvody_photos/00001')
WEIGHTS = Path('vklike-efficientnetb3-epoch25.h5')

SEED = 1337
IMAGE_SIZE = (300, 300)
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2

In [18]:
img_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
    tf.keras.layers.RandomContrast(0.1, seed=42),
])

In [19]:
def build_model(input_shape, num_classes: int, dropout_rate: float):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = img_augmentation(inputs)
    cnn = tf.keras.applications.EfficientNetB3(
        input_tensor=x,
        include_top=False,
        weights="imagenet",
    )

    # Freeze the pretrained weights
    cnn.trainable = False

    # Rebuild top
    x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(cnn.output)
    x = tf.keras.layers.BatchNormalization()(x)

    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes
    x = tf.keras.layers.Dropout(dropout_rate, name="top_dropout")(x)
    outputs = tf.keras.layers.Dense(units, activation=activation, name="pred")(x)

    return tf.keras.Model(inputs, outputs, name="EfficientNet")

In [20]:
# Create an instance of model
model = build_model(
    input_shape=IMAGE_SIZE + (3,),
    num_classes=2,
    dropout_rate=0.5,
)

In [21]:
# Load weights in Keras format(h5)
model.load_weights(WEIGHTS)

In [22]:
def predict(mode, img: Image) -> float:
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)
    predictions = model.predict(img_array)
    return predictions[0][0]

In [23]:
def load_image(url: str, size: Tuple[int, int]) -> Image:
    res = requests.get(url, stream=True)
    img = Image.open(res.raw)
    return img.resize(IMAGE_SIZE)

In [24]:
img = load_image(
    url = 'https://sun9-77.userapi.com/impg/Xv8dJh4OnSgY2ZAc04-_L7SokiJCxmwDKv2PrQ/_htEJkgpA0M.jpg?size=811x1080&quality=96&sign=1641ab517a74fa2e23ef2b0d2acf9ecf&c_uniq_tag=xhbHQL3zHtK59Dfuyz47cQPIIrBtDgIkP_VVRCPXoR0&type=album',
    size = IMAGE_SIZE,
)
score = predict(model, img)

plt.imshow(img)
label = "LIKE" if score > 0.99 else "DISLIKE"
plt.title(f"{label}")

print(f"Image is LIKE with {score} confidence")

In [44]:
class VklikePredictor:
    def __init__(self, model, image_size: Tuple[int, int], batch_size: int) -> None:
        self.model = model
        self.image_size = image_size
        self.batch_size = batch_size

    def __call__(self, files: List[Path]):
        total = self.calc_batches(files)
        result = []
        for chunk in tqdm(self.iter_batch(iter(files)), total=total):
            filenames = []
            images = []
            for f in chunk:
                img = tf.keras.preprocessing.image.load_img(
                    f, target_size=self.image_size,
                )
                img_array = tf.keras.preprocessing.image.img_to_array(img)
                images.append(img_array)
                filenames.append(f.name)
            batch = tf.stack(images, axis=0)
            predictions = model(batch)
            for i, filename in enumerate(filenames):
                score = predictions[i][0].numpy()
                result.append({
                    "filename": filename,
                    "score": score,
                })
        return result
    
    def calc_batches(self, items: List) -> int:
        return int(np.ceil(len(items) / self.batch_size))

    def iter_batch(self, items):
        while True:
            chunk = []
            for i in range(self.batch_size):
                try:
                    x = next(items)
                    chunk.append(x)
                except StopIteration:
                    yield chunk
                    return
            yield chunk

In [None]:
from random import shuffle
xs = list(DATA.glob('*.jpg'))
# shuffle(xs)
# xs = xs[:100]

predict = VklikePredictor(
    model=model,
    image_size=IMAGE_SIZE,
    batch_size=128,
)
# with tf.device('/GPU:0'):
prediction = predict(xs)

In [49]:
df = pd.DataFrame(prediction)
df['score'].hist(bins=25)

In [30]:
def resize_image(image: Image, size: Tuple[int, int]) -> Image:
    width, height = image.size
    if width != height:
        square_size = min(width, height)
        left = (width - square_size) / 2
        top = (height - square_size) / 2
        right = (width + square_size) / 2
        bottom = (height + square_size) / 2
        # Crop the center of the image
        image = image.crop((left, top, right, bottom))
    # now the image is square, resize it 
    w, h = size
    if image.width != w or image.height != h:
        return image.resize(size)
        # , Image.Resampling.LANCZOS)
    return image

In [33]:
def show_predicted(df, base: Path):
    pad = 2
    plt.figure(figsize=(9, 12 + pad))
    for i, (_, row) in enumerate(df.iterrows()):
        img = base / row['filename']
        image = Image.open(img)
        image = resize_image(image, (300, 300))
        ax = plt.subplot(4, 3, i + 1)
        plt.imshow(image)
        s = str(row["score"])[:7]
        title = f"{img.name}\n({s})"
        plt.title(title)
        plt.axis("off")

In [51]:
show_predicted(
    df[df['score'] > 0.999].sample(12),
    base=DATA
)

In [52]:
df.shape

In [55]:
print(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")

In [None]:
df.to_csv('20220410-minvody_photos-512px-00001.csv', index=False)