In [21]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from itertools import product, cycle
from functools import reduce
from operator import mul

from tqdm.notebook import tqdm

import requests
import shutil

import image_recovery.linalg as irl
import image_recovery.imglib as iri

import glob
import os

import cv2

SEED = 42

## Downloading images

In [18]:
CSV_PATH = "google_train.csv"
SAVE_PATH = "google_landmarks"
SAVED_URLS_FILE = "google_landmarks.txt"

max_resolution = (640, 480)
imgs_to_download = 200

with open(CSV_PATH, "r") as fh:
    urls = [x.split(",")[1].strip("\'\"") for x in fh.readlines()[1:]]
    
# ================
# CAREFUL! THIS CODE ERASES ALL THE FILES IN DIRECTORY!

for ximg in glob.glob(os.path.join(SAVE_PATH, "*")):
    os.remove(ximg)
    
# ================

np.random.seed(SEED)
    
with open(SAVED_URLS_FILE, "w") as fh:
    ix = 0
    idxs = iter(np.random.permutation(len(urls)))
    
    while ix < imgs_to_download:
        idx = next(idxs)
        
        try:
            req = requests.get(urls[idx], stream=True)
            
            if req.status_code == 200:
                print(f"{ix + 1}. Successfully downloaded {urls[idx]}")

                if urls[idx].endswith("jpg"):
                    filepath = os.path.join(SAVE_PATH, f"img{idx}.jpg")
                else:
                    filepath = os.path.join(SAVE_PATH, f"img{idx}.png")

                with open(filepath, "wb") as fimg:
                    req.raw.decode_content = True
                    shutil.copyfileobj(req.raw, fimg)

                # ================

                img = cv2.imread(filepath)

                if np.prod(img.shape[:2]) > np.prod(max_resolution):
                    cft = np.sqrt(np.prod(max_resolution)/np.prod(img.shape[:2]))
                    new_shape = (int(img.shape[0]*cft), int(img.shape[1]*cft))

                    print(f"Image has resolution {img.shape[0]} x {img.shape[1]}. Shrinked to {new_shape[0]} x {new_shape[1]}")

                    new_img = cv2.resize(img, (0, 0), fx=cft, fy=cft)
                    cv2.imwrite(filepath, new_img)

                fh.write(", ".join((filepath, urls[idx])) + "\n")
                ix += 1
            else:
                print(f"Failed to download {urls[idx]}")
            
        except:
            print(f"Failed to get URL {urls[idx]}")

1. Successfully downloaded https://lh3.googleusercontent.com/-VahX3jqoUFU/TyV_dOBpkJI/AAAAAAAAAZA/y_KzLanV28M/s1600/
Image has resolution 1600 x 1067. Shrinked to 678 x 452
2. Successfully downloaded https://lh3.googleusercontent.com/-SJMDqfwNHPI/UHNOORpGLDI/AAAAAAAAAGk/W8Q9ZNKsyWo/s0-d/
Failed to download https://lh5.googleusercontent.com/-4O9sMpIES6g/UCzh5J-cC4I/AAAAAAAAACI/aggQj2Tru-0/s1600/
3. Successfully downloaded http://lh6.ggpht.com/-nzFxxt4sjUw/UC7kFgyVpgI/AAAAAAAACtE/1Uzz4ag-LQY/s1600/
Image has resolution 1600 x 1200. Shrinked to 640 x 480
4. Successfully downloaded https://lh4.googleusercontent.com/-t0VADskyAa8/TAxl6bauAcI/AAAAAAAAAog/QRkOrtUPmwg/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
5. Successfully downloaded https://lh6.googleusercontent.com/-yXVp75i5xUk/Ue6SPeGaDLI/AAAAAAAAhDY/qQfAzI-vWGY/s1600/
Image has resolution 1600 x 1067. Shrinked to 678 x 452
6. Successfully downloaded http://lh3.ggpht.com/-LEvAsRLnwlE/RqKpfqhW5PI/AAAAAAAAAS0/oWBz7PBE_Ec

Image has resolution 1600 x 1068. Shrinked to 678 x 452
Failed to download http://mw2.google.com/mw-panoramio/photos/medium/21222578.jpg
50. Successfully downloaded http://lh5.ggpht.com/aleksander.blasiak/R4TZxDVYkLI/AAAAAAAAAL0/jNoUMsX_TEw/s1024/
Image has resolution 681 x 1024. Shrinked to 451 x 679
51. Successfully downloaded http://lh3.ggpht.com/-L3mp62yYC2U/SKwpteyF2vI/AAAAAAAACa0/X7CiSRd2mdw/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
52. Successfully downloaded https://lh4.googleusercontent.com/-xdzzrN9acCc/SC5nUFfByCI/AAAAAAAAAWk/JJHl8tJaz_c/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
53. Successfully downloaded http://lh6.ggpht.com/-E80ZyW-AtgI/SZwnkhS76FI/AAAAAAAAFQc/1wSBetG4t-Y/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
54. Successfully downloaded https://lh3.googleusercontent.com/-5mZsDPt5PcA/Sj6vDTdbZuI/AAAAAAAADD4/8zhXGFkvrzk/s1600/
Image has resolution 1600 x 1200. Shrinked to 640 x 480
55. Successfully downloaded

Failed to download http://mw2.google.com/mw-panoramio/photos/medium/73858989.jpg
98. Successfully downloaded https://lh6.googleusercontent.com/-6nHbk4OtVdk/Rn__c9t_NWI/AAAAAAAAA-8/NWmbCQJmNog/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
99. Successfully downloaded https://lh5.googleusercontent.com/-ohgOppknm-Q/Sc5tQ0f3LJI/AAAAAAAACBc/mDMdROEdFAQ/s1600/
Image has resolution 768 x 1024. Shrinked to 480 x 640
100. Successfully downloaded https://lh5.googleusercontent.com/-7UstUBPYn3I/T9Wg9j61p9I/AAAAAAAAIBE/98JZn0jdtCM/s1600/
Image has resolution 540 x 720. Shrinked to 480 x 640
101. Successfully downloaded http://lh6.ggpht.com/-1s24b36yGBA/SJ2vzgE22xI/AAAAAAAAQCo/0IUSuZZVF4Y/s1600/
102. Successfully downloaded https://lh4.googleusercontent.com/-V4772_FFk8o/StJkEribRDI/AAAAAAAAGTI/KgY3bQ6nPO8/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
103. Successfully downloaded http://lh4.ggpht.com/-WlmWatSLrks/SPNoiriCwCI/AAAAAAAAAMg/cCn64AasFNQ/s1600/
104. Success

Image has resolution 1200 x 1600. Shrinked to 480 x 640
145. Successfully downloaded https://lh6.googleusercontent.com/--HrJMl1xEqI/S9XzK93B9MI/AAAAAAAACFY/0LAiSyKceRA/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
146. Successfully downloaded https://lh3.googleusercontent.com/-2FHHIGxIcY4/S54Hu1a1h9I/AAAAAAAAFmE/sy6TWZEfHIc/s1600/
Image has resolution 857 x 1280. Shrinked to 453 x 677
147. Successfully downloaded https://lh6.googleusercontent.com/-4dNUmJkoVzE/SWl6IH_MdRI/AAAAAAAACLU/9Cwuh_xpbtw/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
148. Successfully downloaded https://lh3.googleusercontent.com/-w8QSqxelDlY/S_H68fikIZI/AAAAAAAAAPc/XLLVKQIQ9T8/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
149. Successfully downloaded http://lh5.ggpht.com/-p85O_qCwLKM/So1hNwgoaRI/AAAAAAAAEWU/gNGCpsMI7bY/s0/
Image has resolution 1600 x 1200. Shrinked to 640 x 480
150. Successfully downloaded https://lh6.googleusercontent.com/-4QBUk_meL4w/TJ-LldRBow

190. Successfully downloaded http://lh3.ggpht.com/-w1iJvwZv8zY/SJRqjYbZOII/AAAAAAAABO4/ckTepcfsElM/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
191. Successfully downloaded https://lh5.googleusercontent.com/-DAK9NwQo4MM/SNZNoq8IcOI/AAAAAAAABOM/bA7LBDDpok8/rj/
Failed to download http://mw2.google.com/mw-panoramio/photos/medium/61616291.jpg
192. Successfully downloaded https://lh4.googleusercontent.com/-NMB-y10EUpo/Tq12SPbO3dI/AAAAAAAAI_8/337Bci9tP5w/s1600/
Image has resolution 1600 x 1064. Shrinked to 679 x 451
Failed to download http://mw2.google.com/mw-panoramio/photos/medium/13765223.jpg
193. Successfully downloaded http://lh5.ggpht.com/-5AeucSPBo50/Svxq6V1j3yI/AAAAAAAADtI/IbHVNX55-s4/s1600/
Image has resolution 1201 x 1600. Shrinked to 480 x 639
Failed to get URL None
194. Successfully downloaded http://lh6.ggpht.com/-rXlcrlUl718/RnZRJXZmTAI/AAAAAAAAAGs/80caH3javIQ/s1600/
Image has resolution 1200 x 1600. Shrinked to 480 x 640
195. Successfully downloaded http://lh

## Experimenting

In [19]:
configs = {
    "q": [0.75],
    "reg_coef": np.logspace(-1.0, 1.0, 5),
    "rot": [5.0, 10.0, 20.0],
    "hard_rank_reduction": [True, False],
    "rank_mult": [0.8]
}

In [4]:
def rprod(xs):
    return reduce(mul, xs)

In [41]:
RESULTS_FILE = "seed42_n200.txt"

paths = glob.glob(os.path.join(SAVE_PATH, "*.png")) + glob.glob(os.path.join(SAVE_PATH, "*.jpg"))

if os.path.isfile(RESULTS_FILE):
    df = pd.read_table(RESULTS_FILE, sep=",")

for (ix, ximg) in tqdm(enumerate(paths), total=len(paths)):
    if ximg not in df.img.values:
        img = iri.img2qm(ximg)
        tainted = (None, None)
        results = [None for _ in range(rprod(map(len, configs.values())))]

        for (jx, xcfg) in tqdm(enumerate(product(*list(zip(cycle((k, )), v) for (k, v) in configs.items()))), 
                               total=rprod(map(len, configs.values())), leave=False):
            cfg = dict(xcfg)
            xq = cfg.pop("q")

            if xq != tainted[0]:
                tainted = (xq, iri.add_random_missing_pixels(img, q=xq, mode="uniform", random_state=SEED))

            # ================

            rimg, U, _ = irl.lrqmc(mtr=tainted[1][0], mask=tainted[1][1], init_rank=100,
                                   max_iter=100, rel_tol=1e-3, random_state=SEED, progress=False, **cfg)
            results[jx] = {
                **cfg,
                **{
                    "q": xq,
                    "img": ximg,
                    "rank": U.shape[1],
                    "norm": np.sqrt(np.power(rimg - img, 2).sum())
                }
            }

        # ================

        pd.DataFrame(results).to_csv(RESULTS_FILE, mode="a", header=(ix == 0))

HBox(children=(FloatProgress(value=0.0, max=201.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

error: OpenCV(4.1.2) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
