Paper implementation of "Lung Cancer Classification in Histopathology Images Using Multiresolution Efficient Nets"

https://doi.org/10.1155/2023/7282944

In [None]:
!pip install torch torchvision timm tqdm

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets
import timm
from tqdm import tqdm

In [None]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define your transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # For EfficientNetB0 --> 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Load dataset
dataset = datasets.ImageFolder(root='F:\Colon_Classifier\data\data', transform=transform)

In [None]:
# Split the dataset into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [None]:
# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [None]:
# Define the EfficientNet model
model_name = 'efficientnet_b0'
model = timm.create_model(model_name, pretrained=True)
num_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Linear(128, len(dataset.classes)),
    nn.Softmax(dim=1)  # Softmax layer for classification
)

In [None]:
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# Training loop
num_epochs = 10                # In the paper all models are trained for 100 epochs
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Calculate average training loss
    avg_train_loss = train_loss / len(train_loader.dataset)
    print(f'Training Loss: {avg_train_loss}')

In [None]:
# Testing loop
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc='Testing'):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

Testing: 100%|██████████| 81/81 [00:05<00:00, 15.79it/s]


In [None]:
# Calculate accuracy
accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 81.49%


In [None]:
torch.save(model.state_dict(), 'paper_implementation.pth')

In [None]:
################### Inference Script #######################################

In [None]:
import os
import pandas as pd
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import Image
import torch
import timm

# Define transform for test images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define the path to the folder containing test images
test_folder = r'F:\Colon_Classifier\Colon-tissue-classification\ism_project_2023\ism_project_2023\test'

# Load the list of test images
test_images = [os.path.join(test_folder, img) for img in os.listdir(test_folder)]

# Initialize EfficientNet model
model = timm.create_model('efficientnet_b0', pretrained=True)
num_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Linear(128, len(dataset.classes)),
    nn.Softmax(dim=1)  # Softmax layer for classification
)

# Final fully connected layer
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        in_features = module.in_features
        break



# Load the trained model weights
model.load_state_dict(torch.load(r'C:\Users\simon\AppData\Local\Microsoft\Windows\INetCache\IE\ZVGOQXTB\paper_implementation.pth'))

# Set the model to evaluation mode
model.eval()

# Create a DataFrame to store predictions
predictions_df = pd.DataFrame(columns=['Image', 'Prediction'])

for image_path in tqdm(test_images, desc='Making Predictions', unit='image', leave=False):
    # Load and preprocess the image
    img = Image.open(image_path).convert('RGB')
    img = transform(img)
    img = img.unsqueeze(0)  # Add batch dimension

    # Make the prediction
    with torch.no_grad():
        model_output = model(img)

    # Get the predicted class
    _, predicted_class = torch.max(model_output, 1)

    # Extract the image filename
    image_filename = os.path.basename(image_path)

    # Append the prediction to the DataFrame
    predictions_df = predictions_df.append({'Image': image_filename, 'Prediction': predicted_class.item()}, ignore_index=True)

# Save the predictions to a CSV file
predictions_df.to_csv('paper_predictions.csv', index=False)

print("Predictions saved to 'predictions.csv'")


In [None]:
import pandas as pd
import torch
import os
from PIL import Image
import torchvision.transforms as transforms
import timm
from tqdm import tqdm
#from timm.models import hub

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TEST_IMG_DIR = r'F:\Colon_Classifier\Colon-tissue-classification\ism_project_2023\ism_project_2023\test'
MODEL_WEIGHTS_PATH = r'C:\Users\simon\AppData\Local\Microsoft\Windows\INetCache\IE\ZVGOQXTB\paper_implementation.pth'
CSV_FILE = r"F:\Colon_Classifier\Colon-tissue-classification\ism_project_2023\ism_project_2023\test.csv"
model_name = 'efficientnet_b0'

model = timm.create_model(model_name, pretrained=False, num_classes=4)
num_features = model.classifier.in_features
model.classifier = nn.Sequential(
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Linear(128, len(dataset.classes)),
    nn.Softmax(dim=1)  # Softmax layer for classification
)
model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH))
# model_cfg = dict(labels=['normal tissue', 'serrated lesion', 'adenocarcinoma', 'adenoma'])
# hub.push_to_hf_hub(model, model_name, model_config=model_cfg)
model.to(DEVICE)
model.eval()

print("INFO: MODEL LOADED !!!")

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def predict(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0) #add batch dimension
    image = image.to(DEVICE)
    with torch.no_grad():
        output = model(image)
        probabilities = torch.nn.functional.softmax(output[0], dim=0)
        predicted_class = probabilities.argmax().item()
    return int(predicted_class)

df = pd.read_csv(CSV_FILE)

for index, row in tqdm(df.iterrows(), total=len(df)):
    image_name = row['name']
    image_path = os.path.join(TEST_IMG_DIR, image_name + '.jpg')
    predicted_class = predict(image_path, model, transform)
    df.at[index, 'label'] = predicted_class

df['label'] = df['label'].astype(int)
df.to_csv('predictions.csv',index=False)

In [None]:
import pandas as pd
csv = pd.read_csv('predictions.csv')
print(csv['label'].value_counts())

In [None]:
!pip install pandas