In [1]:
import os
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from timm import create_model
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from collections import defaultdict
import requests
import nltk
from nltk.stem import WordNetLemmatizer
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torchvision.transforms import autoaugment, transforms
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Download required NLTK data
nltk.download('wordnet', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Disable DecompressionBombWarning
Image.MAX_IMAGE_PIXELS = None

# Set up logging
logging.basicConfig(filename='image_errors.log', level=logging.ERROR, 
                    format='%(asctime)s:%(levelname)s:%(message)s')

In [3]:
def clean_and_normalize_label(label):
    """Clean and normalize a label."""
    label = str(label)
    cleaned = ' '.join(label.lower().split())
    cleaned = ''.join(e for e in cleaned if e.isalnum() or e.isspace())
    return ' '.join(lemmatizer.lemmatize(word) for word in cleaned.split())

In [4]:
class FactoryNetBBoxDataset(Dataset):
    def __init__(self, coco_json, data_dir, transform=None):
        with open(coco_json, 'r') as f:
            self.coco_data = json.load(f)
        self.data_dir = data_dir
        self.transform = transform
        
        self.image_info = {img['id']: img for img in self.coco_data['images']}
        self.category_info = {cat['id']: cat for cat in self.coco_data['categories']}
        
        self.instances = []
        for ann in self.coco_data['annotations']:
            self.instances.append({
                'image_id': ann['image_id'],
                'bbox': ann['bbox'],
                'category_id': ann['category_id']
            })
    
    def __len__(self):
        return len(self.instances)
    
    def __getitem__(self, idx):
        instance = self.instances[idx]
        img_info = self.image_info[instance['image_id']]
        img_path = os.path.join(self.data_dir, img_info['file_name'])
        
        try:
            with Image.open(img_path) as img:
                # Crop the image to the bounding box
                bbox = instance['bbox']
                img = img.crop((bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))
                img = img.convert('RGB')
        except Exception as e:
            logging.error(f"Error opening image {img_path}: {str(e)}")
            img = Image.new('RGB', (224, 224), color='gray')
        
        if self.transform:
            img = self.transform(img)
        
        label = instance['category_id']
        
        return img, label

In [5]:
def create_coco_dataset(data_dir, excluded_files):
    coco_format = {
        "images": [],
        "annotations": [],
        "categories": []
    }
    
    category_id_map = {}
    annotation_id = 1
    image_id = 0
    
    all_files = [f for f in os.listdir(data_dir) if f.endswith('.jpg') and f not in excluded_files]
    
    for img_file in tqdm(all_files, desc="Creating COCO dataset"):
        csv_file = img_file.replace('.jpg', '.csv')
        csv_path = os.path.join(data_dir, csv_file)
        img_path = os.path.join(data_dir, img_file)
        
        if os.path.exists(csv_path):
            try:
                with Image.open(img_path) as img:
                    width, height = img.size
                
                coco_format["images"].append({
                    "id": image_id,
                    "file_name": img_file,
                    "width": width,
                    "height": height
                })
                
                df = pd.read_csv(csv_path, header=None, names=['label', 'x', 'y', 'height', 'width', 'source'])
                for _, row in df.iterrows():
                    label = clean_and_normalize_label(row['label'])
                    if label not in category_id_map:
                        category_id = len(category_id_map)
                        category_id_map[label] = category_id
                        coco_format["categories"].append({
                            "id": category_id,
                            "name": label
                        })
                    
                    coco_format["annotations"].append({
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": category_id_map[label],
                        "bbox": [row['x'], row['y'], row['width'], row['height']],
                        "area": row['width'] * row['height'],
                        "iscrowd": 0
                    })
                    annotation_id += 1
                
                image_id += 1
            except Exception as e:
                print(f"Error processing {img_file}: {str(e)}")
    
    return coco_format, category_id_map

In [6]:
def create_improved_class_hierarchy(category_id_map):
    hierarchy = defaultdict(list)
    qid_map = {}
    
    for label, category_id in tqdm(category_id_map.items(), desc="Creating class hierarchy"):
        qid = get_wikidata_qid(label)
        if qid:
            qid_map[label] = qid
            wiki_hierarchy = get_wikidata_hierarchy(qid)
            for child, parent, _, _ in wiki_hierarchy:
                hierarchy[parent].append(child)
    
    # Improve hierarchy order
    G = nx.DiGraph(hierarchy)
    sorted_hierarchy = nx.topological_sort(G)
    return {node: hierarchy[node] for node in sorted_hierarchy}, qid_map

In [7]:
def get_wikidata_qid(label):
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": label
    }
    headers = {
        'User-Agent': 'FactoryNetHackathon/1.0 (https://github.com/yourusername/factorynet-hackathon; youremail@example.com)'
    }
    
    response = requests.get(url, params=params, headers=headers)
    data = response.json()
    
    if data.get('search'):
        return data['search'][0]['id']
    return None

In [8]:
def get_wikidata_hierarchy(qid, max_depth=5):
    query = f"""
    SELECT ?item ?itemLabel ?parent ?parentLabel
    WHERE {{
      wd:{qid} wdt:P279* ?item.
      OPTIONAL {{ ?item wdt:P279 ?parent. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
    }}
    LIMIT {max_depth}
    """
    
    url = "https://query.wikidata.org/sparql"
    headers = {
        'User-Agent': 'FactoryNetHackathon/1.0 (https://github.com/yourusername/factorynet-hackathon; youremail@example.com)'
    }
    
    response = requests.get(url, params={'query': query, 'format': 'json'}, headers=headers)
    data = response.json()
    
    hierarchy = []
    for item in data['results']['bindings']:
        child_qid = item['item']['value'].split('/')[-1]
        child_label = item['itemLabel']['value']
        parent_qid = item.get('parent', {}).get('value', '').split('/')[-1]
        parent_label = item.get('parentLabel', {}).get('value')
        if parent_qid and parent_label:
            hierarchy.append((child_qid, parent_qid, child_label, parent_label))
    
    return hierarchy

In [9]:
class ConvNeXtClassifier(nn.Module):
    def __init__(self, num_classes, pretrained=True):
        super(ConvNeXtClassifier, self).__init__()
        self.model = create_model('convnext_tiny', pretrained=pretrained, num_classes=num_classes)
        
    def forward(self, x):
        return self.model(x)

In [10]:
def get_class_weights(dataset):
    class_counts = defaultdict(int)
    for instance in dataset.instances:
        class_counts[instance['category_id']] += 1
    
    total_samples = len(dataset)
    class_weights = {class_id: total_samples / count for class_id, count in class_counts.items()}
    return class_weights

In [20]:
def create_weighted_sampler(dataset):
    class_counts = defaultdict(int)
    for instance in dataset.instances:
        class_counts[instance['category_id']] += 1
    
    total_samples = len(dataset)
    class_weights = {class_id: total_samples / count for class_id, count in class_counts.items()}
    
    # Use the actual dataset indices
    sample_weights = [class_weights[dataset.instances[i]['category_id']] for i in range(len(dataset))]
    return WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)


In [12]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=100, patience=20):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    best_val_loss = float('inf')
    best_model_wts = model.state_dict()
    history = {
        'train_loss': [], 'val_loss': [],
        'train_accuracy': [], 'val_accuracy': [],
        'train_f1': [], 'val_f1': []
    }
    no_improve = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                dataloader = train_loader
            else:
                model.eval()
                dataloader = val_loader

            running_loss = 0.0
            all_preds = []
            all_labels = []

            for inputs, labels in tqdm(dataloader, desc=phase):
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_accuracy = accuracy_score(all_labels, all_preds)
            epoch_f1 = f1_score(all_labels, all_preds, average='weighted')
            
            print(f'{phase} Loss: {epoch_loss:.4f} Accuracy: {epoch_accuracy:.4f} F1: {epoch_f1:.4f}')

            # Record history
            history[f'{phase}_loss'].append(epoch_loss)
            history[f'{phase}_accuracy'].append(epoch_accuracy)
            history[f'{phase}_f1'].append(epoch_f1)

            if phase == 'val':
                scheduler.step()
                if epoch_loss < best_val_loss:
                    best_val_loss = epoch_loss
                    best_model_wts = model.state_dict()
                    no_improve = 0
                else:
                    no_improve += 1
                
                if no_improve >= patience:
                    print(f'Early stopping triggered after {epoch+1} epochs')
                    model.load_state_dict(best_model_wts)
                    return model, history

    model.load_state_dict(best_model_wts)
    return model, history

In [13]:
def plot_training_history(history):
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.plot(history['train_loss'], label='Train')
    plt.plot(history['val_loss'], label='Validation')
    plt.title("Loss Over Time")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    
    plt.subplot(1, 3, 2)
    plt.plot(history['train_accuracy'], label='Train')
    plt.plot(history['val_accuracy'], label='Validation')
    plt.title("Accuracy Over Time")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    
    plt.subplot(1, 3, 3)
    plt.plot(history['train_f1'], label='Train')
    plt.plot(history['val_f1'], label='Validation')
    plt.title("F1 Score Over Time")
    plt.xlabel("Epoch")
    plt.ylabel("F1 Score")
    plt.legend()
    
    plt.tight_layout()
    plt.savefig("training_history.png")
    plt.close()

In [14]:
data_dir = '/scratch/rpushpar/ASME_Hackathon/hackathon/data'
coco_json_path = 'factorynet_coco_bbox.json'

excluded_files = [
    '1711599094686.jpg', '1711596276749.jpg', '1711585942345.jpg',
    '1711568830061.jpg', '1711569373889.jpg', '1711597390150.jpg',
    '1711569098811.jpg', '1711586005189.jpg', '1711577373063.jpg',
    '1711568023828.jpg'
]

# Create COCO format dataset if it doesn't exist
if not os.path.exists(coco_json_path):
    coco_data, category_id_map = create_coco_dataset(data_dir, excluded_files)
    with open(coco_json_path, 'w') as f:
        json.dump(coco_data, f)
else:
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)
    category_id_map = {cat['name']: cat['id'] for cat in coco_data['categories']}

# Create class hierarchy
hierarchy, qid_map = create_improved_class_hierarchy(category_id_map)

Creating COCO dataset: 100%|██████████| 6358/6358 [00:36<00:00, 173.34it/s]
Creating class hierarchy: 100%|██████████| 4077/4077 [25:07<00:00,  2.70it/s] 


In [15]:
# Save output files
with open('classes.txt', 'w') as f:
    for qid in qid_map.values():
        f.write(f"{qid}\n")

with open('entities.txt', 'w') as f:
    for label, qid in qid_map.items():
        f.write(f"{qid}\trdfs:label\t{label}\n")
    for parent, children in hierarchy.items():
        for child in children:
            f.write(f"{child}\tsubclassOf\t{parent}\n")

# Print hierarchy statistics
print(f"Number of unique classes: {len(category_id_map)}")
print(f"Number of hierarchical relationships: {sum(len(children) for children in hierarchy.values())}")

Number of unique classes: 4077
Number of hierarchical relationships: 4998


In [21]:
# Create datasets with improved data augmentation
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    autoaugment.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = FactoryNetBBoxDataset(coco_json_path, data_dir, transform=train_transform)

# Create weighted sampler for the full dataset
full_sampler = create_weighted_sampler(full_dataset)

# Split the dataset
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Create subset samplers
train_sampler = torch.utils.data.SubsetRandomSampler(train_dataset.indices)
val_sampler = torch.utils.data.SubsetRandomSampler(val_dataset.indices)

# Apply different transforms
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform

# Create data loaders
train_loader = DataLoader(full_dataset, batch_size=32, sampler=train_sampler, num_workers=4)
val_loader = DataLoader(full_dataset, batch_size=32, sampler=val_sampler, num_workers=4)

In [23]:
# Initialize model with pre-trained weights
model = ConvNeXtClassifier(num_classes=len(category_id_map), pretrained=True)

# Training setup with improved hyperparameters
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=0.05)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)

# Train the model
model, history = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=20)

# Save the model
torch.save(model.state_dict(), 'convnext_factorynet_bbox_model.pth')

print("Training completed. Model saved as 'convnext_factorynet_bbox_model.pth'.")
print("Class hierarchy information saved in 'classes.txt' and 'entities.txt'.")

# Plot training history
plot_training_history(history)

Epoch 1/50
----------


train: 100%|██████████| 328/328 [05:02<00:00,  1.08it/s]


train Loss: 6.1865 Accuracy: 0.0152 F1: 0.0024


val: 100%|██████████| 82/82 [01:13<00:00,  1.11it/s]


val Loss: 1.5225 Accuracy: 0.0252 F1: 0.0043
Epoch 2/50
----------


train: 100%|██████████| 328/328 [04:45<00:00,  1.15it/s]


train Loss: 5.6842 Accuracy: 0.0389 F1: 0.0134


val: 100%|██████████| 82/82 [01:09<00:00,  1.17it/s]


val Loss: 1.4853 Accuracy: 0.0455 F1: 0.0179
Epoch 3/50
----------


train: 100%|██████████| 328/328 [04:41<00:00,  1.17it/s]


train Loss: 4.9182 Accuracy: 0.0871 F1: 0.0498


val: 100%|██████████| 82/82 [01:10<00:00,  1.16it/s]


val Loss: 1.4299 Accuracy: 0.0630 F1: 0.0362
Epoch 4/50
----------


train: 100%|██████████| 328/328 [04:40<00:00,  1.17it/s]


train Loss: 3.7514 Accuracy: 0.2401 F1: 0.1963


val: 100%|██████████| 82/82 [01:10<00:00,  1.17it/s]


val Loss: 1.4335 Accuracy: 0.0673 F1: 0.0460
Epoch 5/50
----------


train: 100%|██████████| 328/328 [04:37<00:00,  1.18it/s]


train Loss: 2.7183 Accuracy: 0.4699 F1: 0.4496


val: 100%|██████████| 82/82 [01:15<00:00,  1.09it/s]


val Loss: 1.4604 Accuracy: 0.0722 F1: 0.0586
Epoch 6/50
----------


train: 100%|██████████| 328/328 [04:44<00:00,  1.15it/s]


train Loss: 2.1519 Accuracy: 0.5692 F1: 0.5564


val: 100%|██████████| 82/82 [01:08<00:00,  1.20it/s]


val Loss: 1.4639 Accuracy: 0.0734 F1: 0.0649
Epoch 7/50
----------


train: 100%|██████████| 328/328 [04:43<00:00,  1.16it/s]


train Loss: 1.8574 Accuracy: 0.6031 F1: 0.5911


val: 100%|██████████| 82/82 [01:10<00:00,  1.17it/s]


val Loss: 1.4712 Accuracy: 0.0757 F1: 0.0680
Epoch 8/50
----------


train: 100%|██████████| 328/328 [04:41<00:00,  1.16it/s]


train Loss: 1.6737 Accuracy: 0.6235 F1: 0.6113


val: 100%|██████████| 82/82 [01:13<00:00,  1.11it/s]


val Loss: 1.4857 Accuracy: 0.0780 F1: 0.0725
Epoch 9/50
----------


train: 100%|██████████| 328/328 [04:39<00:00,  1.17it/s]


train Loss: 1.5505 Accuracy: 0.6478 F1: 0.6316


val: 100%|██████████| 82/82 [01:08<00:00,  1.19it/s]


val Loss: 1.4863 Accuracy: 0.0780 F1: 0.0721
Epoch 10/50
----------


train: 100%|██████████| 328/328 [04:40<00:00,  1.17it/s]


train Loss: 1.4783 Accuracy: 0.6848 F1: 0.6626


val: 100%|██████████| 82/82 [01:14<00:00,  1.10it/s]


val Loss: 1.4898 Accuracy: 0.0787 F1: 0.0730
Epoch 11/50
----------


train: 100%|██████████| 328/328 [05:33<00:00,  1.02s/it]


train Loss: 2.2658 Accuracy: 0.5664 F1: 0.5532


val: 100%|██████████| 82/82 [01:29<00:00,  1.09s/it]


val Loss: 1.5097 Accuracy: 0.0711 F1: 0.0599
Epoch 12/50
----------


train: 100%|██████████| 328/328 [06:04<00:00,  1.11s/it]


train Loss: 2.1924 Accuracy: 0.5870 F1: 0.5739


val: 100%|██████████| 82/82 [01:35<00:00,  1.16s/it]


val Loss: 1.5123 Accuracy: 0.0676 F1: 0.0600
Epoch 13/50
----------


train: 100%|██████████| 328/328 [05:57<00:00,  1.09s/it]


train Loss: 2.0138 Accuracy: 0.6091 F1: 0.5967


val: 100%|██████████| 82/82 [01:20<00:00,  1.01it/s]


val Loss: 1.4965 Accuracy: 0.0764 F1: 0.0688
Epoch 14/50
----------


train: 100%|██████████| 328/328 [05:05<00:00,  1.07it/s]


train Loss: 1.8997 Accuracy: 0.6160 F1: 0.6042


val: 100%|██████████| 82/82 [01:15<00:00,  1.08it/s]


val Loss: 1.4860 Accuracy: 0.0711 F1: 0.0636
Epoch 15/50
----------


train: 100%|██████████| 328/328 [04:39<00:00,  1.18it/s]


train Loss: 1.8022 Accuracy: 0.6299 F1: 0.6196


val: 100%|██████████| 82/82 [01:12<00:00,  1.14it/s]


val Loss: 1.4942 Accuracy: 0.0726 F1: 0.0662
Epoch 16/50
----------


train: 100%|██████████| 328/328 [04:44<00:00,  1.15it/s]


train Loss: 1.7290 Accuracy: 0.6346 F1: 0.6230


val: 100%|██████████| 82/82 [01:11<00:00,  1.14it/s]


val Loss: 1.4819 Accuracy: 0.0791 F1: 0.0727
Epoch 17/50
----------


train: 100%|██████████| 328/328 [04:51<00:00,  1.12it/s]


train Loss: 1.6690 Accuracy: 0.6369 F1: 0.6257


val: 100%|██████████| 82/82 [01:14<00:00,  1.11it/s]


val Loss: 1.4851 Accuracy: 0.0730 F1: 0.0654
Epoch 18/50
----------


train: 100%|██████████| 328/328 [04:42<00:00,  1.16it/s]


train Loss: 1.6198 Accuracy: 0.6413 F1: 0.6304


val: 100%|██████████| 82/82 [01:10<00:00,  1.17it/s]


val Loss: 1.4836 Accuracy: 0.0772 F1: 0.0686
Epoch 19/50
----------


train: 100%|██████████| 328/328 [04:44<00:00,  1.15it/s]


train Loss: 1.5936 Accuracy: 0.6472 F1: 0.6346


val: 100%|██████████| 82/82 [01:13<00:00,  1.12it/s]


val Loss: 1.4869 Accuracy: 0.0734 F1: 0.0685
Epoch 20/50
----------


train: 100%|██████████| 328/328 [04:56<00:00,  1.11it/s]


train Loss: 1.5385 Accuracy: 0.6473 F1: 0.6377


val: 100%|██████████| 82/82 [01:14<00:00,  1.10it/s]


val Loss: 1.4891 Accuracy: 0.0783 F1: 0.0719
Epoch 21/50
----------


train: 100%|██████████| 328/328 [04:54<00:00,  1.11it/s]


train Loss: 1.4981 Accuracy: 0.6521 F1: 0.6415


val: 100%|██████████| 82/82 [01:15<00:00,  1.08it/s]


val Loss: 1.5026 Accuracy: 0.0776 F1: 0.0710
Epoch 22/50
----------


train: 100%|██████████| 328/328 [04:55<00:00,  1.11it/s]


train Loss: 1.4628 Accuracy: 0.6523 F1: 0.6405


val: 100%|██████████| 82/82 [01:15<00:00,  1.09it/s]


val Loss: 1.4922 Accuracy: 0.0760 F1: 0.0681
Epoch 23/50
----------


train: 100%|██████████| 328/328 [04:49<00:00,  1.13it/s]


train Loss: 1.4291 Accuracy: 0.6541 F1: 0.6416


val: 100%|██████████| 82/82 [01:13<00:00,  1.12it/s]


val Loss: 1.5103 Accuracy: 0.0741 F1: 0.0683
Early stopping triggered after 23 epochs
Training completed. Model saved as 'convnext_factorynet_bbox_model.pth'.
Class hierarchy information saved in 'classes.txt' and 'entities.txt'.
