In [1]:
import os

from matplotlib import pyplot as plt
from PIL import Image
import cv2
import numpy as np

import sqlite3

from utils import *

In [2]:
color_points = np.load("apple_points3d.npy")
pts3d = color_points[:,:3]
rgb3d = color_points[:,-3:]

with open("./0/images.txt", "r") as f:
    temp_images = f.readlines()[4:][::2]
img_ids = np.array([x.split(" ")[0] for x in temp_images], dtype=np.int16)
poses = np.array([x.split(" ")[1:8] for x in temp_images], dtype=np.float32)

with open("./0/cameras.txt", "r") as f:
    temp_cameras = f.readlines()[-1]
cam_params = np.array(temp_cameras.split(" ")[-4:], dtype=np.float32)

con = sqlite3.connect("./database.db")
cur = con.cursor()
img_dict = dict()
for row in cur.execute("SELECT * FROM IMAGES"):
    img_dict[row[0]] = row[1]
con.close()

In [3]:
filenames = [img_dict[idx] for idx in img_ids]
images_path = "./images/"

In [4]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
scaled_points = StandardScaler().fit_transform(pts3d)
model = DBSCAN(eps=0.1, min_samples=5)
labels = model.fit_predict(scaled_points)

In [5]:
clusters = []
for i in range(labels.max()):
    curr_indices = (labels == i)
    cluster = pts3d[curr_indices]
    clusters.append(cluster)

In [6]:
from tqdm import tqdm

cluster_images = []

# for each cluster
for cluster in tqdm(clusters):
    # calculate position of cluster in each pose
    cluster_instances = []
    for i in range(len(filenames)):
        extrinsic = get_extrinsic_matrix(poses[i])
        K = get_K_matrix(cam_params)

        cam_pts = get_camera_view(cluster, extrinsic)
        pixel_pts = get_image_view(cam_pts, K)
        inliers = get_in_view_points(pixel_pts)

        if inliers.all():
            rmin, cmin = pixel_pts[:,:2].min(axis=0)
            rmax, cmax = pixel_pts[:,:2].max(axis=0)
            bbox = np.int32(np.round(np.array([rmin, cmin, rmax, cmax])))

            filename = filenames[i]

            cluster_instances.append([filename, bbox])
    
    cluster_images.append(cluster_instances)

100%|██████████| 114/114 [00:00<00:00, 282.01it/s]


In [7]:
import torch
from torchvision.models import resnet50, ResNet50_Weights
device = torch.device("cuda")

# Using pretrained weights:
weights = ResNet50_Weights.IMAGENET1K_V2
model = resnet50(weights=weights).to(device)
model.fc = torch.nn.Linear(2048, 7, bias=True).to(device)

model.load_state_dict(torch.load("resnet.pt"))
model.eval()

print("Loaded updated model")

Loaded updated model


In [8]:
from torchvision.io import read_image
from torchvision import transforms as T

transform = T.Compose([
    T.Resize((224,224), antialias=True),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

cluster_counts = []

for cluster in tqdm(cluster_images):
    counts = []
    for img_name, bbox in cluster:
        img_path = os.path.join(images_path, img_name)
        image = read_image(img_path) / 255.0
        image = image[:,bbox[1]:bbox[3], bbox[0]:bbox[2]]
        image = transform(image)
        image = image.unsqueeze(0)

        with torch.no_grad():
            pred = model(image.to(device))
            counts.append(pred.argmax().item())
    cluster_counts.append(counts)

100%|██████████| 114/114 [00:38<00:00,  2.93it/s]


In [9]:
per_cluster_count = [np.median(sorted(counts)[-3:]) for counts in cluster_counts if len(counts) > 0]

In [12]:
total = sum(per_cluster_count)
print(total)

303.0
