In [6]:
import math
import time
import sys
import numpy as np
from wildlife_tools.features import DeepFeatures
from wildlife_tools.similarity import CosineSimilarity
from wildlife_tools.inference import KnnClassifier

from fedn.utils.helpers.helpers import save_metrics

from torch.optim import SGD
from wildlife_tools.train import ArcFaceLoss, BasicTrainer
import itertools

from fedn.utils.helpers.helpers import save_metadata


from math import floor
import torch
import torchvision
import collections
import timm
from wildlife_tools.data.dataset import WildlifeDataset
import random
from wildlife_datasets.datasets import MacaqueFaces, Cows2021v2, LeopardID2022
import torchvision.transforms as T
from wildlife_datasets import datasets, loader, metrics
from wildlife_datasets import splits
import os
import json
import pandas as pd
from fedn.utils.helpers.helpers import get_helper

HELPER_MODULE = "numpyhelper"
helper = get_helper(HELPER_MODULE)

import torch

import torch._utils
try:
    torch._utils._rebuild_tensor_v2
except AttributeError:
    def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
        tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
        tensor.requires_grad = requires_grad
        tensor._backward_hooks = backward_hooks
        return tensor
    torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2

import torch.nn as nn
from torch.nn import init
from torchvision import models
from torch.autograd import Variable
# import pretrainedmodels

######################################################################
def weights_init_kaiming(m):
    classname = m.__class__.__name__
    # print(classname)
    if classname.find('Conv') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal.
    elif classname.find('Linear') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
        init.constant_(m.bias.data, 0.0)
    elif classname.find('BatchNorm1d') != -1:
        init.normal_(m.weight.data, 1.0, 0.02)
        init.constant_(m.bias.data, 0.0)

def weights_init_classifier(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        init.normal_(m.weight.data, std=0.001)
        init.constant_(m.bias.data, 0.0)

# Defines the new fc layer and classification layer
# |--Linear--|--bn--|--relu--|--Linear--|
class ClassBlock(nn.Module):
    def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f = False):
        super(ClassBlock, self).__init__()
        self.return_f = return_f
        add_block = []
        if linear:
            add_block += [nn.Linear(input_dim, num_bottleneck)]
        else:
            num_bottleneck = input_dim
        if bnorm:
            add_block += [nn.BatchNorm1d(num_bottleneck)]
        if relu:
            add_block += [nn.LeakyReLU(0.1)]
        if droprate>0:
            add_block += [nn.Dropout(p=droprate)]
        add_block = nn.Sequential(*add_block)
        add_block.apply(weights_init_kaiming)

        classifier = []
        classifier += [nn.Linear(num_bottleneck, class_num)]
        classifier = nn.Sequential(*classifier)
        classifier.apply(weights_init_classifier)

        self.add_block = add_block
        self.classifier = classifier
    def forward(self, x):
        x = self.add_block(x)
        if self.return_f:
            f = x
            x = self.classifier(x)
            return x,f
        else:
            x = self.classifier(x)
            return x

# Define the ResNet50-based Model
class ft_net(nn.Module):

    def __init__(self, class_num, droprate=0.5, stride=2):
        super(ft_net, self).__init__()
       
        model_ft = models.resnet50(pretrained=True)
        # model_ft=torch.load('saved_res50.pkl')
        # avg pooling to global pooling
        if stride == 1:
            model_ft.layer4[0].downsample[0].stride = (1,1)
            model_ft.layer4[0].conv2.stride = (1,1)
        model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.model = model_ft
        self.classifier = ClassBlock(2048, class_num, droprate)

    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)
        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        x = self.model.layer4(x)
        x = self.model.avgpool(x)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x




# dir_path = os.path.dirname(os.path.realpath(__file__))
# sys.path.append(os.path.abspath(dir_path))
def full_data(data_path, is_query = True):
    # splitter = splits.ClosedSetSplit(0.8)
    metadata = LeopardID2022('/home/wellvw12/')
    # idx_train, idx_test = splitter.split(metadata.df)[0]
    # df_train = metadata.df.loc[idx_train]
    # df_test = metadata.df.loc[idx_test]

    transform = T.Compose([T.Resize([256, 128]), 
                       T.ToTensor(), 
                    #    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    #    T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
                    ])
    if is_query:
        df = pd.read_csv(data_path)
        return WildlifeDataset(df,metadata.root, transform=transform)
    else:
        df = pd.read_csv(data_path)
        return WildlifeDataset(df,metadata.root, transform=transform)

def load_data(data_path, is_train=True):
    """Load data from disk.
    :param data_path: Path to data file.
    :type data_path: str
    :param is_train: Whether to load training or test data.
    :type is_train: bool
    :return: Tuple of data and labels.
    :rtype: tuple
    """
    transform = T.Compose([T.Resize([224, 224]), 
                       T.ToTensor(), 
                       T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                       T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
                    ])

    # print(data_path)
    metadata = LeopardID2022('/home/wellvw12/')

    if is_train:
        df = pd.read_csv(data_path)
        return WildlifeDataset(df,metadata.root, transform=transform)
    else:
        df = pd.read_csv(data_path)
        return WildlifeDataset(df,metadata.root, transform=transform)
    
# def compile_model():
#     """Compile the pytorch model.

#     :return: The compiled model.
#     :rtype: torch.nn.Module
#     """
#     # return timm.create_model('hf-hub:BVRA/MegaDescriptor-T-224', num_classes=0, pretrained=False)
#     return timm.create_model('resnet18', num_classes=0, pretrained=True)

def compile_model(class_num=751, droprate=0.5, stride=2):
    """Create an empty ft_net model with the same architecture as used during training
    
    Args:
        class_num (int): Number of classes (must match original training)
        droprate (float): Dropout rate (must match original training)
        stride (int): Stride parameter (must match original training)
        
    Returns:
        ft_net: The uncompiled model with random initialization
    """
    net = ft_net(751, stride=1)

    # model = ft_net(class_num=class_num, droprate=droprate, stride=stride)
    
    # Important: Remove the classifier if your saved parameters don't include it
    # (Uncomment if needed)
    # net.classifier = nn.Sequential() 

    # input = Variable(torch.FloatTensor(8, 3, 256, 128))
    # output = net(input)
    
    return net


def save_parameters(model, out_path):
    """Save model paramters to file.

    :param model: The model to serialize.
    :type model: torch.nn.Module
    :param out_path: The path to save to.
    :type out_path: str
    """
    parameters_np = [val.cpu().numpy() for _, val in model.state_dict().items()]
    helper.save(parameters_np, out_path)


import torch
import torch.nn as nn
from collections import OrderedDict

def load_parameters(model_path, num_classes=430, device='cuda'):
    """
    Load federated model parameters while handling architecture mismatches.
    
    Args:
        model_path (str): Path to saved model file
        num_classes (int): Number of output classes
        device (str): Device to load model onto ('cuda' or 'cpu')
        
    Returns:
        torch.nn.Module: Loaded model in evaluation mode
    """
    # Initialize model with correct architecture
    model = ft_net(num_classes).to(device)
    
    # Load saved state dict
    checkpoint = torch.load(model_path, map_location=device)
    
    # Handle different save formats
    if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
        state_dict = checkpoint['model_state_dict']
    else:
        state_dict = checkpoint
    
    # Create filtered state dict matching current model architecture
    model_state_dict = model.state_dict()
    
    # 1. Handle classifier mismatch (remove classifier if needed)
    if 'classifier.classifier.0.weight' in state_dict:
        if isinstance(model.classifier.classifier, nn.Sequential):
            # Remove classifier weights from loaded state dict
            state_dict = {k: v for k, v in state_dict.items() 
                         if not k.startswith('classifier.classifier')}
    
    # 2. Filter only matching parameters
    filtered_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k in model_state_dict:
            if v.size() == model_state_dict[k].size():
                filtered_state_dict[k] = v
            else:
                print(f"Size mismatch for {k}: loaded {v.size()}, model {model_state_dict[k].size()}")
        else:
            print(f"Skipping unexpected key: {k}")
    
    # Load filtered state dict
    model.load_state_dict(filtered_state_dict, strict=False)
    
    # Ensure classifier is empty if needed
    if isinstance(model.classifier.classifier, nn.Sequential):
        model.classifier.classifier = nn.Sequential()
    
    model.eval()
    return model
            



def init_seed(out_path="seed.npz"):
    """Initialize seed model and save it to file.

    :param out_path: The path to save the seed model to.
    :type out_path: str
    """
    # Init and save
    model = compile_model()
    save_parameters(model, out_path)
    
import importlib
import json

HELPER_PLUGIN_PATH = "fedn.utils.helpers.plugins.{}"


def get_helper(helper_module_name):
    """Return an instance of the helper class.

    :param helper_module_name: The name of the helper plugin module.
    :type helper_module_name: str
    :return: A helper instance.
    :rtype: class: `fedn.utils.helpers.helpers.HelperBase`
    """
    helper_plugin = HELPER_PLUGIN_PATH.format(helper_module_name)
    helper = importlib.import_module(helper_plugin)
    return helper.Helper()


def save_metadata(metadata, filename):
    """Save metadata to file.

    :param metadata: The metadata to save.
    :type metadata: dict
    :param filename: The name of the file to save to.
    :type filename: str
    """
    with open(filename + "-metadata", "w") as outfile:
        json.dump(metadata, outfile)


def load_metadata(filename):
    """Load metadata from file.

    :param filename: The name of the file to load from.
    :type filename: str
    :return: The loaded metadata.
    :rtype: dict
    """
    with open(filename + "-metadata", "r") as infile:
        metadata = json.load(infile)
    return metadata


def save_metrics(metrics, filename):
    """Save metrics to file.

    :param metrics: The metrics to save.
    :type metrics: dict
    :param filename: The name of the file to save to.
    :type filename: str
    """
    with open(filename, "w") as outfile:
        json.dump(metrics, outfile)
        
def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1):
    """Complete a model update.

    Load model paramters from in_model_path (managed by the FEDn client),
    perform a model update, and write updated paramters
    to out_model_path (picked up by the FEDn client).

    :param in_model_path: The path to the input model.
    :type in_model_path: str
    :param out_model_path: The path to save the output model to.
    :type out_model_path: str
    :param data_path: The path to the data file.
    :type data_path: str
    :param batch_size: The batch size to use.
    :type batch_size: int
    :param epochs: The number of epochs to train.
    :type epochs: int
    :param lr: The learning rate to use.
    :type lr: float
    """
    # Load data to return wilflifedataset
    # data_path: FEDN_DATA_PATH= ./data/clients/1/
    if data_path is None:
        data_path = os.environ.get("FEDN_DATA_PATH")


    # lr = 0.001
    torch.cuda.empty_cache()
    x_train = load_data(data_path + 'train.csv')
    # x_train = full_data(data_path + 'train.csv')
    # Load parmeters and initialize model

    model = load_parameters(in_model_path)
    # Train
    lr =0.0002
    objective = ArcFaceLoss(
        num_classes=x_train.num_classes,
        embedding_size=768,
        margin=0.5,
        scale=64
    )
    # Optimize parameters in backbone and in objective using single optimizer.
    params = itertools.chain(model.parameters(), objective.parameters())
    optimizer = SGD(params=params, lr=lr, momentum=0.9)
    min_lr = optimizer.defaults.get("lr") * 1e-3
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=min_lr)
    
    # set_seed(0)
    torch.manual_seed(42)
    trainer = BasicTrainer(
        dataset=x_train, 
        model=model,
        objective=objective,
        optimizer=optimizer,
        scheduler=None,
        batch_size=batch_size,
        accumulation_steps=2,
        num_workers=2,
        epochs=100,
        device='cuda',
    )

    
    if os.path.exists("/home/wellvw12/fed_wild/checkpoint.pth"):
        trainer.load("checkpoint.pth")
    
    trainer.train()
    trainer.save("./","checkpoint.pth",False)
    
    # Metadata needed for aggregation server side
    
    metadata = {
        # num_examples are mandatory
        "num_examples": len(x_train),
        "batch_size": batch_size,
        "epochs": epochs,
        "lr": lr,
    }

    # Save JSON metadata file (mandatory)
    save_metadata(metadata, out_model_path)

    # Save model update (mandatory)
    save_parameters(model, out_model_path)

def validate(in_model_path, data_path=None):
 
    x_query = full_data(data_path + "query.csv")
    x_gallery  = full_data(data_path + "gallery.csv", is_query=False)

    # Load model
    model = load_parameters(in_model_path)
    model.eval()

    extractor = DeepFeatures(model)
    
    query = extractor(x_query)
    database = extractor(x_gallery)

    similarity_function = CosineSimilarity()
    similarity = similarity_function(query, database)

    classifier = KnnClassifier(k=1, database_labels=x_gallery.labels_string)
    predictions = classifier(similarity['cosine'])
    accuracy = np.mean(x_query.labels_string == predictions)

    print(f'accuracy: {accuracy}')

    # JSON schema
    report = {
        "training_accuracy": accuracy,
    }


In [None]:
#train
train('/home/wellvw12/fed_wild/project/seed.npz','/home/wellvw12/fed_wild/project/seedZ.npz',data_path='/home/wellvw12/data_iid/clients/1/')

/home/wellvw12/data_iid/clients/1/train.csv


Epoch 0: 100%|██████████████████████████████████████████████████████| 17/17 [00:05<00:00,  3.07it/s]
Epoch 1: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.23it/s]
Epoch 2: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.20it/s]
Epoch 3: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.21it/s]
Epoch 4: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.19it/s]
Epoch 5: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.19it/s]
Epoch 6: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.16it/s]
Epoch 7: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.17it/s]
Epoch 8: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  4.16it/s]
Epoch 9: 100%|██████████████████████████████████████████████████████| 17/17 [00:04<00:00,  

In [7]:
validate('/home/wellvw12/resnet50_ft_net.pth','/home/wellvw12/full_leopard_test/')

  checkpoint = torch.load(model_path, map_location=device)
100%|█████████████████████████████████████████████████████████████████| 6/6 [00:46<00:00,  7.79s/it]
100%|███████████████████████████████████████████████████████████████| 34/34 [04:24<00:00,  7.77s/it]

accuracy: 0.041666666666666664



  results = pd.DataFrame(results).T.fillna(method="ffill").T


In [1]:
import os
import csv
import pandas as pd
from wildlife_tools.data.dataset import WildlifeDataset
import torchvision.transforms as T

# Set your root directory
root_dir = '/home/wellvw12/data_iid'
metadata = '/home/wellvw12/Cows'

all_rows = []
columns = None
file_paths = []

# Walk through all directories and files
for dirpath, dirnames, filenames in os.walk(root_dir):
    # print(dirpath,dirnames,filenames)
    for filename in filenames:
        # print(filename)
        if filename.lower() == 'test.csv':
            # print(filename)
            file_path = os.path.join(dirpath, filename)
            file_paths.append(file_path)

li = []
for file_path in file_paths:
    df = pd.read_csv(file_path, index_col=None, header=0)
    li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
transform = T.Compose([T.Resize([224, 224]), 
                       T.ToTensor(), 
                       T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                       T.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
                    ])
d = WildlifeDataset(frame,metadata.root, transform=transform)
# Create DataFrame manually
# df = pd.DataFrame(all_rows, columns=columns)
frame


  from .autonotebook import tqdm as notebook_tqdm


ValueError: No objects to concatenate

In [15]:
query_df , gallery_df = load_data('/home/wellvw12/data_iid/clients/1/' + "test.csv")

/home/wellvw12/data_iid/clients/1/test.csv


500
131 367
