In [None]:
!pip install pdf2image
!apt-get install -y poppler-utils

In [None]:
import torch
import torch.nn as nn
import numpy as np
from torchvision import transforms
from PIL import Image
from pdf2image import convert_from_path
import cv2
from google.colab import files

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [None]:
def paper_to_image(pdf_file_name: str) -> str:
    images = convert_from_path(pdf_file_name, 200, fmt='jpg')
    num_page = len(images)

    if num_page < 7:
        raise Exception('최소 7페이지 이상')

    new_im = Image.new('RGB', (images[0].width * 4, images[0].height * 2), (255, 255, 255))

    for i in range(4):
        new_im.paste(images[i], (images[0].width * i, 0))

    for i in range(4, 8):
        new_im.paste(images[i], (images[0].width * (i - 4), images[0].height))

    img_file_name = pdf_file_name[:-4] + '.jpg'
    new_im = new_im.resize((3400, 2200))
    new_im.save(img_file_name)

    img = cv2.imread(img_file_name)
    img = cv2.resize(img, (680, 440))
    img[0:15, 0:150] = 255
    cv2.imwrite(img_file_name, img)

    return img_file_name

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 56 * 56, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
model.eval()

In [None]:
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

uploaded = files.upload()
for file_name in uploaded.keys():
    print(f"업로드: {file_name}")

img_file_name = paper_to_image(file_name)

In [None]:
T = 5
with torch.no_grad():
    im = Image.open(img_file_name)
    im_tensor = preprocess(im).unsqueeze(0).to(device)
    output = model(im_tensor)
    output = output.cpu().numpy() / T
    prob = softmax(output[0])

    print(f"Good paper | {file_name}] = {prob[0]*100:.2f}%")