# Duplicate Image Detector

### Install Dependencies

In [1]:
# uv venv image_duplicate_finder --python 3.11

# #In Windows
# .\image_duplicate_finder\Scripts\activate

# #In MacOS
# source image_duplicate_finder/bin/activate

# uv pip install ipykernel opencv-python pandas matplotlib tqdm

# #In MacOS
# image_duplicate_finder/bin/python -m ipykernel install --user --name=image_duplicate_finder --display-name "image_duplicate_finder"

# #In Windows same as above but instead of 'bin' use 'Scripts'

### Load data

In [None]:
import os

#Folder paths
desktop_path = os.path.expanduser('~/Desktop')
query_img_folder = os.path.join(desktop_path, "image_folder", 'queries') #Each image here appears as a duplicate in the source folder
source_img_folder = os.path.join(desktop_path, "image_folder", 'sources') #Folder with original images
test_query_img_folder = os.path.join(desktop_path, "image_folder", 'test_queries') #Test images in which groundtruth is provided

#List of files in each folder
query_img_files = os.listdir(query_img_folder)
source_img_files = os.listdir(source_img_folder)
test_query_img_files = os.listdir(test_query_img_folder)

In [None]:
import pandas as pd

#Groundtruth for test queries
test_query_groundtruth_path = os.path.join(desktop_path, "image_folder", 'test_queries_groundtruth.csv')

test_query_groundtruth = pd.read_csv(test_query_groundtruth_path)
test_query_groundtruth.head()

Unnamed: 0,query,source
0,c13c90525541b925ec5d390c9399faee.png,293.png
1,5faef2c5413559dbed0c44220e259f16.png,427.png
2,0f18392f68dcb3bc6d827f9631601a0f.png,819.png
3,35e53a47f4d581232df309469201797e.png,857.png
4,d945350100c6442503a8e138692ba32d.png,235.png


### Create a figure to display each query image next to each source image to manually inspect cases

In [4]:
def save_query_source_pairs_figure(df=test_query_groundtruth, query_img_folder=test_query_img_folder, save_filename='test_query_source_pairs.png'):
    import matplotlib.pyplot as plt
    import cv2

    # Create a figure with subplots for each query-source pair
    num_pairs = len(df)
    fig, axes = plt.subplots(num_pairs, 2, figsize=(12, 6 * num_pairs))

    for idx, row in df.iterrows():
        query_filename = row['query']
        source_filename = row['source']
        
        # Load query image
        query_img_path = os.path.join(query_img_folder, query_filename)
        query_img = cv2.imread(query_img_path)
        query_img = cv2.cvtColor(query_img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib
        
        # Load source image
        source_img_path = os.path.join(source_img_folder, source_filename)
        source_img = cv2.imread(source_img_path)
        source_img = cv2.cvtColor(source_img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib
        
        # Display query image
        axes[idx, 0].imshow(query_img)
        axes[idx, 0].set_title(f'Query: {query_filename}')
        axes[idx, 0].axis('off')
        
        # Display source image
        axes[idx, 1].imshow(source_img)
        axes[idx, 1].set_title(f'Source: {source_filename}')
        axes[idx, 1].axis('off')

    plt.tight_layout()
    # plt.show()
    plt.savefig(save_filename, dpi=300, bbox_inches='tight')
    plt.close(fig)

In [5]:
# save_query_source_pairs_figure()

### Feature Descriptors - Identify keypoints resistant to transformations (rotations, crops, partial deduplication)

#### [SIFT](https://medium.com/@deepanshut041/introduction-to-sift-scale-invariant-feature-transform-65d7f3a72d40) (Scale-Invariant Feature Transform) internally does 2 things at the same time:

1. **Keypoint detection**
    - Image is repeatedly blurred with Gaussians at multiple σ scales. The 3D space of (x, y, scale) is separated into octaves (see image below) and the num of octaves depends on image size. Each octave size is half the previous one. Within each octave images are progressively blurred. 

    # <div style="text-align: center;"><img src="imgs/scale.png" height="600"></div>

    - For each scale, a difference-of-gaussians is computed as $$DoG(img, \sigma_i) = G(\sigma_{i+1}) - G(\sigma_i)$$ This is done for different octaves of the images in the pyramid. 
    # <div style="text-align: center;"><img src="imgs/DoG.png" height="300"></div>
    - In the above 3D space, local maxima/minima are matching candidates. These maxima/minima are calculated by checking for each pixel in the DoG pyramid 26 neighbors:
        - 8 in the same scale level (all pixels around it)
        - 9 in scale below (same pixel in that scale and all around it)
        - 9 in scale above

        If the pixel values is bigger than all neighbors then it's a local maxima, and if it's smaller is a local minima. 

        # <div style="text-align: center;"><img src="imgs/local_minima_maxima.png" height="300"></div>
    - We filter out candidates with low contrast (intensity check) and those that lie along edges (similar to Harris Corner detector), since not useful as features.

    The above procedure returns positions of keypoints, their scales and their local orientation. They are scale and rotation invariant.

     <div style="text-align: center;"><img src="imgs/keypoints.png" height="600"></div>
    Stages of keypoint selection. (a) The 233x189 pixel original image. (b) The initial 832 keypoints locations at maxima and minima of the difference-of-Gaussian function. Keypoints are displayed as vectors indicating scale, orientation, and location. (c) After applying a threshold on minimum contrast, 729 keypoints remain. (d) The final 536 keypoints that remain following an additional threshold on ratio of principal curvatures.

# 

2. **Descriptor Computation (for each keypoint)**
    - Takes a region around the keypoint with radius proportional to its scale (higher scales result in larger regions - more pixels). For example, 3*σ.
    - Rotate the chosen region (patch) to canonical dominant orientation. This is achieved by calculating gradients (dx, dy) around each keypoint and make a 36-bin orientation histogram with all gradients (depending on the gradient direction a certain keypoint is assigned to a bin - e.g. if is 19 degress it goes into the 10-19 degrees bin, and the amount added to that bin is proportional to the magnitude of gradient of that point). Then, SIFT rotates the patch so that the peak orientation in the histogram becomes angle=0 (also peaks above 80% considered to calculate orientation). That way we have rotation invariance since no matter how the image is rotated, the descriptor will always be computed in a normalized reference frame. 
    # <div style="text-align: center;"><img src="imgs/histogram.png" height="300"></div>
    - Divide the region into 4*4 subblocks
    - For each sublock computes 8 orientation gradient histogram bins (0-360 degrees, every bin with 45 degrees). For each pixel we compute the gradient magnitude and direction. This contributes with a weighted magnitude to the nearest orientation bin(s) with Gaussian spatial weighting (center contributes more than edges). That gives the descriptor local smoothness and stability.
    - Concatenate the results to get a vector of length 4 * 4 * 8=128 (descriptor). This uses gradient orientations and is not rotation invariant. Invariance is achieved by subtracting each orientation from each keypoint's rotation. Moreover, illumination independence is achived by thresholding. 

    # <div style="text-align: center;"><img src="imgs/vector.png" height="200"></div>

    Descriptor is stable across moderate lighting changes, noise, and small viewpoint changes. This is why this method is still the preferred one for many cases over DL solutions. 


Keypoints between two images are matched by identifying their nearest neighbor. In some cases, the 2nd closest match might be very near to the first due to noise. In that case, ratio of 1st/2nd distance is taken and if greater than a threshold (e.g. 0.8) the match is rejected. Based on the [paper](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf), this eliminates 90% of false matches, with the cost of discarding only 5% of correct ones. 

 # <div style="text-align: center;"><img src="imgs/example.png" width="600"></div>

Overall, rotation invariance is achieved through canonical orientation normalization, and flipping and partial deduplication are achieved since SIFT is local and if part of images overlap, then keypoints will still match. For flipping, although SIFT is not inherently mirror-invariant, in practice it sometimes works since many gradients are symmetric and matcher does nearest neighbor search. That way, mirror invariance is 'incorporated' but not guaranteed. In our implementation, we test manually the flipped image.

#### Calculate source features and save them to pickle file

In [6]:
def calculate_source_features(feature_detector='SIFT', matcher='BF'): #takes ±10min for 2500 imgs for SIFT, 2min for ORB
    import cv2
    import numpy as np
    from tqdm import tqdm
    import pickle

    save_name = 'source_features_'+feature_detector+'_'+matcher+'.pkl'

    # Initialize local feature detector
    if feature_detector == 'SIFT':
        sift = cv2.SIFT_create() # SIFT is great for robustness
    elif feature_detector == 'ORB':
        orb = cv2.ORB_create(nfeatures=2000) # ORB is faster - only approximate nearest neighbor
    else:
        raise ValueError(f"Invalid feature detector: {feature_detector}. Choose between 'SIFT' and 'ORB'.")

    # Initialize matcher - finds most similar descriptors between query-source imgs
    if matcher == 'BF':
        bf = cv2.BFMatcher() #For ORB and SIFT
    elif matcher == 'FLANN':
        flann = cv2.FlannBasedMatcher() #For SIFT
    else:
        raise ValueError(f"Invalid matcher: {matcher}. Choose between 'BF' and 'FLANN'.")

    # Pre-compute source features (to save time - optimization step)
    source_features = {}
    for source_path in tqdm(source_img_files, desc="Pre-computing source features"):
        # First convert to grayscale since feature descriptors work on such imgs and also help with contrast differences
        img = cv2.imread(os.path.join(source_img_folder, source_path), cv2.IMREAD_GRAYSCALE)
        if feature_detector == 'SIFT':
            kp, des = sift.detectAndCompute(img, None) 
        elif feature_detector == 'ORB':
            kp, des = orb.detectAndCompute(img, None)
        else:
            raise ValueError(f"Invalid feature detector: {feature_detector}. Choose between 'SIFT' and 'ORB'.")
        #The above returns the keypoints and descriptors. The second argument is region of img where detection is allowed (none to detect in entire img).
        #We might not want to e.g. detect features in a region that corresponds to 'sky' and therefore we pass a binary mask to use the region of interest.

        source_features[source_path] = (kp, des)
        #kp is a list of cv2.KeyPoint objects. Each KeyPoint has necessarily the 'pt' attribute: Tuple of (x,y) coordinates of the keypoint ((num_keypoints, 2)).
        #The other attributes (not used in our implementation) are:
        #1) size: diameter of the neighborhood considered for this keypoint
        #2) angle: orientation of the keypoint
        #3) response: strength of the keypoint (measure of keypoint quality)
        #4) octave: pyramid octave from which the keypoint was extracted
        #These attributes would be needed if we wanted to e.g. visualize the keypoints, 
        # or use sift.compute() instead of sift.detectAndCompute() to get the keypoints and descriptors separately.
        
        #des is a numpy array of shape (n, 128) where n is the number of keypoints. Each row is the 128D descriptor for a keypoint.
        
    saved_source_features = {} #Convert to serializable format
    # This assumes source_features is still populated in memory and contains (cv2.KeyPoint, descriptor array)
    print(f"Converting {len(source_features)} image feature sets...")
    for source_path, (kp, des) in source_features.items():
        # Convert cv2.KeyPoint objects to a serializable NumPy array (only keeps the pt attribute, not the other ones)
        kp_array = cv2.KeyPoint.convert(kp) 
        # Update the dictionary with the converted data
        saved_source_features[source_path] = (kp_array, des)
    print("Conversion complete.")

    try:
        with open(save_name, 'wb') as f:
            pickle.dump(saved_source_features, f)
        print(f"Saved source features to cache: {save_name}")
    except Exception as e:
        print(f"Error saving source features to cache: {e}")

    return source_features

In [None]:
# source_features_SIFT_BF = calculate_source_features()

Pre-computing source features: 100%|██████████| 2500/2500 [09:19<00:00,  4.47it/s]


Converting 2500 image feature sets...
Conversion complete.
Saved source features to cache: source_features_SIFT_BF.pkl


### Load pre-computed features and convert them back to opencv compatible format

In [8]:
def load_source_features(feature_detector='SIFT', matcher='BF'):
    import pickle
    import cv2
    import numpy as np
    from tqdm import tqdm

    cache_file = 'source_features_'+feature_detector+'_'+matcher+'.pkl'

    #Load from cache
    with open(cache_file, 'rb') as f:
        loaded_source_features = pickle.load(f)
    print(f"Loaded source features from cache: {cache_file}")

    # Create a new dictionary to hold the OpenCV-compatible features
    source_features = {}

    for source_path, (kp_array, des) in loaded_source_features.items():
        # Convert the NumPy array (kp_array) back to a list of cv2.KeyPoint objects
        kp_objects = cv2.KeyPoint.convert(kp_array)
        
        # Store the results
        source_features[source_path] = (kp_objects, des) #dict with key the source img paths and value the keypoints and descriptors
        
    print("Keypoints successfully converted back to cv2.KeyPoint objects.")

    return source_features

In [9]:
source_features_SIFT_BF = load_source_features()

Loaded source features from cache: source_features_SIFT_BF.pkl
Keypoints successfully converted back to cv2.KeyPoint objects.


### Find matches of queries to source imgs

In [10]:
def find_matches(query_img_files=test_query_img_files, 
                 query_img_folder=test_query_img_folder, 
                 source_features=source_features_SIFT_BF,
                 feature_detector='SIFT', 
                 matcher='BF', 
                 ransac_threshold=5, #Has to be at least 4 for findHomography to work
                 acceptance_threshold=6,
                 pixel_tolerance=5.0,
                 lowe_ratio=0.75): #Takes 75mins for 50imgs
                 
    import cv2
    import numpy as np
    import pickle
    from tqdm import tqdm

    # Initialize feature detector
    if feature_detector == 'SIFT':
        keypoint_detector = cv2.SIFT_create() # SIFT is great for robustness
    elif feature_detector == 'ORB':
        keypoint_detector = cv2.ORB_create(nfeatures=2000) # ORB is faster
    else:
        raise ValueError(f"Invalid feature detector: {feature_detector}. Choose between 'SIFT' and 'ORB'.")

    # Initialize matcher - finds most similar descriptors between query-source imgs
    if matcher == 'BF' or feature_detector == 'ORB': #ORB cannot be directly used with FLANN
        keypoint_matcher = cv2.BFMatcher() 
    elif matcher == 'FLANN' and feature_detector != 'ORB':
        keypoint_matcher = cv2.FlannBasedMatcher()
    else:
        raise ValueError(f"Invalid matcher: {matcher}. Choose between 'BF' and 'FLANN'.")

    # Don't run expensive RANSAC unless we have at least this many 'good' matches.
    # If we lower this from e.g. 15 to 10 we can get a bit more permissive.
    PRE_RANSAC_THRESHOLD = ransac_threshold

    # If the *best* match we find still has fewer inliers than this, we consider it a "no match".
    # Setting this to a low value (e.g., 8) will allow for weaker matches.
    ACCEPTANCE_THRESHOLD = acceptance_threshold

    # Main loop
    final_matches = []
    non_matched_queries = []
    for query_path in tqdm(query_img_files, desc="Matching queries"): #Loop over all query images to check for matches/duplicates
        query_img = cv2.imread(os.path.join(query_img_folder, query_path), cv2.IMREAD_GRAYSCALE) #Read query image and convert to grayscale

        best_match_for_this_query = (None, 0) # It will store (source_path, num_inliers_score) and keeps track of best match so far

        # List of variants to check (original and flipped - SIFT is not invariant to flips)
        variants_to_try = [
            ("original", query_img),
            ("flipped horizontally", cv2.flip(query_img, 1)) ,
            # ("flipped vertically", cv2.flip(query_img, 0)),
            # ("flipped both", cv2.flip(query_img, -1))
            # 0 to flip vertically (mirror top↔bottom), +1 flip horizontally (mirror left↔right), and -1 to flip both axes (horizontal AND vertical) - similar to applying both flips
            # vertical and both flips are not needed since SIFT is rotation invariant
        ]

        for variant_name, img_to_match in variants_to_try:
            
            # Get keypoints for the current variant (original or flipped)
            q_kp, q_des = keypoint_detector.detectAndCompute(img_to_match, None)
            if q_des is None:
                continue  # This variant has no features/descriptors, try next one

            # Loop over ALL sources
            for source_path, (s_kp, s_des) in tqdm(source_features.items(), desc=f"Matching {query_path} ({variant_name})", leave=False): #leave=False to avoid extra prints
                if s_des is None:
                    continue
                    
                # Match descriptors - k=2 (two matches) for Lowe’s ratio test. The second nearest neighbor is needed in order to reject ambiguous matches (reduce FPs)
                matches = keypoint_matcher.knnMatch(q_des, s_des, k=2) #cv2.DMatch objects - list of lists (with 2 elements - k=2 - , each having queryIdx, trainIdx, distance, etc.)
                
                # Filter matches (Lowe's Ratio Test)
                good_matches = []
                for m, n in matches: #best and second best match
                    if m.distance < lowe_ratio * n.distance: #If the best match is much better than the second-best, then it is likely real. If both are similarly close → ambiguous → reject.
                        good_matches.append(m)

                # Geometric Verification (RANSAC) - Only run if we have enough good matches
                if len(good_matches) >= PRE_RANSAC_THRESHOLD:
                    
                    #(x,y) keypoint matches and their locations in query/source images
                    q_pts = np.float32([q_kp[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    s_pts = np.float32([s_kp[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

                    # RANSAC tries to find a geometric transform that maps query → source consistently. It tries random minimal subsets of correspondences.
                    # For each subset, it solves for the homography that maps 2D points from query → source.
                    # Then, it checks how many of the other correspondences agree with that homography (within an error threshold).
                    # Matches that agree with that geometric model are inliers.
                    # mask returned is a 0(outlier)/1(inlier) mask per match telling which are inliers. It has shape (n,1) where n is the number of matches.
                    M, mask = cv2.findHomography(q_pts, s_pts, cv2.RANSAC, pixel_tolerance) #(srcPoints, dstPoints, method, ransacReprojThreshold)
                    # Last parameter is pixel tolerance and it means that if a pixel is mapped using M lands more than 5 pixels away from expected, it is considered an outlier.
                    # Decrease its value means fewer matches will be inliers => fewer false matches (do it if need very high certainty)
                    # Increase its value means more matches will be inliers => more false matches (do it if missing real duplicate matches)
                    
                    if M is not None: #M should be a 3*3 homography matrix
                        num_inliers = np.sum(mask) #Score - number of inliers
                        inlier_ratio = num_inliers / len(good_matches)
                        
                        # Check if this match is better than the best one we've seen *so far* for this query (across original and flipped variants).
                        if num_inliers > best_match_for_this_query[1] and inlier_ratio >= 0.40:
                            best_match_for_this_query = (source_path, num_inliers) #Update the best match

        # --- END OF INNER LOOPS (all sources and all variants checked) ---

        # Now, we look at the single best match we found.
        best_source_path, best_score = best_match_for_this_query

        # Apply our final, low-threshold filter
        if best_score >= ACCEPTANCE_THRESHOLD:
            # The best match is good enough. Add it to the results.
            final_matches.append((query_path, best_source_path))
        else:
            # The best match we found was still too weak (only few inliers).
            non_matched_queries.append((query_path, best_source_path))

    # Save results to pickle file
    results_pickle_path = 'results_'+feature_detector+'_'+matcher+'_ransac'+str(ransac_threshold)+'_acc'+str(acceptance_threshold)+'.pkl'
    with open(results_pickle_path, 'wb') as f:
        pickle.dump(final_matches, f)
    print(f"Results saved to {results_pickle_path}")

    # Save non-matched queries to pickle file
    non_matched_pickle_path = 'non_matched_queries_'+feature_detector+'_'+matcher+'_ransac'+str(ransac_threshold)+'_acc'+str(acceptance_threshold)+'.pkl'
    with open(non_matched_pickle_path, 'wb') as f:
        pickle.dump(non_matched_queries, f)
    print(f"Non-matched queries saved to {non_matched_pickle_path}")

    return final_matches, non_matched_queries

In [11]:
def print_results_validation(results):
    correct_matches = []
    for result in results:
        row = test_query_groundtruth[test_query_groundtruth['query'] == result[0]]
        print(row)
        print(result)

        if row['source'].values[0] == result[1]:
            correct_matches.append(result)

    print("Number of correct matches: ", len(correct_matches), " out of ", len(results))

    # Print query rows that are not in results_best
    results_unique = set([result[0] for result in results])
    missing_queries = test_query_groundtruth[~test_query_groundtruth['query'].isin(results_unique)]
    print("Queries not in results_best:")
    print(missing_queries)

### Validation Data

In [17]:
results_SIFT_BF_ransac5_acc6, non_matched_queries_SIFT_BF_ransac5_acc6 = find_matches(source_features=source_features_SIFT_BF, 
                                                                                      feature_detector='SIFT', 
                                                                                      matcher='BF', 
                                                                                      ransac_threshold=5, 
                                                                                      acceptance_threshold=6)

print_results_validation(results_SIFT_BF_ransac5_acc6)

Matching queries: 100%|██████████| 50/50 [1:53:07<00:00, 135.76s/it]


Results saved to results_SIFT_BF_ransac5_acc6.pkl
Non-matched queries saved to non_matched_queries_SIFT_BF_ransac5_acc6.pkl
                                   query   source
18  b023abd14c6600cfe66f684fdff137e6.png  240.png
('b023abd14c6600cfe66f684fdff137e6.png', '240.png')
                                   query   source
28  e007132501dd130409defc6ba087c79e.png  779.png
('e007132501dd130409defc6ba087c79e.png', '779.png')
                                   query    source
30  d2cb060508cd8364794f2cf0019d7b9c.png  1855.png
('d2cb060508cd8364794f2cf0019d7b9c.png', '1855.png')
                                   query    source
41  7d9617443abc48dbc118d941fe8823e9.png  1935.png
('7d9617443abc48dbc118d941fe8823e9.png', '1935.png')
                                   query    source
37  4ddd6b4e1310441d8dc0c396a223029c.png  1726.png
('4ddd6b4e1310441d8dc0c396a223029c.png', '1726.png')
                                   query   source
29  771a413aa5dcc20125534b346e3c6fbb.png  266.png
('771a4

In [18]:
non_matched_queries_SIFT_BF_ransac5_acc6

[('d3e3e6cb46086e23ed4e9140e99ad228.png', None),
 ('dc214392894266f271b4081bb9c7b67f.png', None)]

### Run Predictions

In [14]:
predictions_SIFT_BF_ransac5_acc6, non_matched_queries_predictions_SIFT_BF_ransac5_acc6 = find_matches(query_img_files=query_img_files, 
                                                                                                        query_img_folder=query_img_folder, 
                                                                                                        source_features=source_features_SIFT_BF, 
                                                                                                        feature_detector='SIFT',
                                                                                                        matcher='BF', 
                                                                                                        ransac_threshold=5, 
                                                                                                        acceptance_threshold=6)

Matching queries: 100%|██████████| 68/68 [26:44<00:00, 23.60s/it]  

Results saved to results_SIFT_BF_ransac5_acc6.pkl
Non-matched queries saved to non_matched_queries_SIFT_BF_ransac5_acc6.pkl





In [15]:
print(predictions_SIFT_BF_ransac5_acc6)
print(non_matched_queries_predictions_SIFT_BF_ransac5_acc6)
#Create predictions dataframe
predictions_df = pd.DataFrame(predictions_SIFT_BF_ransac5_acc6, columns=['query', 'source'])
predictions_df.to_csv('predictions_SIFT_BF_ransac5_acc6.csv', index=False)
save_query_source_pairs_figure(df=predictions_df, query_img_folder=query_img_folder, save_filename='predictions_source_pairs_SIFT_BF_ransac5_acc6.png')

[('61930a6c0bda1d7e39c250d980d0049a.png', '418.png'), ('09cdbb77dbfe80e1d3d5f516be8ef384.png', '954.png'), ('adfb62461764b18d39e679c576dab3d5.png', '1763.png'), ('deca309278f75dbd0e59908e8d4d0890.png', '807.png'), ('ac2293afb6315d376fe6c36936a03ace.png', '412.png'), ('72fa6a858675fe14d52da5953eeb4c4a.png', '1889.png'), ('7e8480b7431196acc9736ddd906fd867.png', '2143.png'), ('3946aa20504869d539cd0aaa73b85bcf.png', '561.png'), ('796ebc5612b058916c9563e69b8063dd.png', '1977.png'), ('b70eb89d5a3da01223a2f27f1c59ca2e.png', '1850.png'), ('fa17f8c1ab5566ded40692b0c7c62905.png', '1092.png'), ('62842a52e63081d4586aae865d537c09.png', '1972.png'), ('ac8b81c6cbea588c86b4d9c8604abace.png', '646.png'), ('ccb02781b6994092ef369b522f0f868c.png', '1812.png'), ('c533a992cf1bc1c00c51fe6004f7b1f5.png', '1506.png'), ('9763a4d7e70ca501140f35b342f1db30.png', '2194.png'), ('667cda67202350486984fc87645b0f31.png', '2232.png'), ('b50adb1f5f0e8aa5302b70ec2567fc47.png', '329.png'), ('8a1b27877911694b44cf514b598ce59d