In [None]:
import os
import cv2
import torch
from torchvision import transforms


train_chart_folder = "train/chart"
train_nonchart_folder = "train/nonchart"

to_tensor = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224,224)) 
])

def load_images_as_tensors(folder):
    tensors = []
    for filename in os.listdir(folder):
        if filename.endswith((".png", ".jpg", ".jpeg")):
            img = cv2.imread(os.path.join(folder, filename))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            tensor = to_tensor(img)
            tensors.append(tensor)
    return tensors

chart_tensors = load_images_as_tensors(train_chart_folder)
nonchart_tensors = load_images_as_tensors(train_nonchart_folder)


print(f"Loaded {len(chart_tensors)} chart images and {len(nonchart_tensors)} non-chart images.")


Loaded 15 chart images and 15 non-chart images.


used panda for data analysis

In [3]:
def preprocess_input_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    tensor = to_tensor(img)
    return tensor

input_tensor = preprocess_input_image("page.png")


In [9]:
from torch.nn.functional import cosine_similarity
from skimage.metrics import structural_similarity as ssim
import numpy as np
from skimage.transform import resize

def compute_similarity(input_tensor, train_tensors):
    cos_sims = []
    ssim_scores = []
    input_np = input_tensor.permute(1,2,0).numpy()
    
    for t in train_tensors:
        cos_sim = cosine_similarity(
            input_tensor.flatten().unsqueeze(0),
            t.flatten().unsqueeze(0)
        ).item()
        cos_sims.append(cos_sim)
        
        # SSIM
        t_np = t.permute(1,2,0).numpy()
        if t_np.shape != input_np.shape:
            t_np_resized = resize(t_np, input_np.shape, preserve_range=True, anti_aliasing=True)
        else:
            t_np_resized = t_np
        ssim_score, _ = ssim(t_np_resized, input_np, channel_axis=2, full=True, data_range=1.0)
        ssim_scores.append(ssim_score)
    
    return cos_sims, ssim_scores 

cos_chart, ssim_chart = compute_similarity(input_tensor, chart_tensors)
cos_nonchart, ssim_nonchart = compute_similarity(input_tensor, nonchart_tensors)

print("Max Cosine with chart images:", max(cos_chart))
print("Max Cosine with non-chart images:", max(cos_nonchart))
print("Max SSIM with chart images:", max(ssim_chart))
print("Max SSIM with non-chart images:", max(ssim_nonchart))


Max Cosine with chart images: 0.9999929070472717
Max Cosine with non-chart images: 0.9954390525817871
Max SSIM with chart images: 0.9975357
Max SSIM with non-chart images: 0.5917752


In [24]:
if max(cos_chart) > max(cos_nonchart):
    print("Input image is likely a chart")
else:
    print("Input image is likely non-chart")


Input image is likely a chart


In [25]:
if max(ssim_chart) > max(ssim_nonchart):
    print("Input image is likely a chart")
else:
    print("Input image is likely non-chart")

Input image is likely a chart
