In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
from glob import glob
from tqdm import tqdm
from os.path import expanduser, join, basename, dirname
import xarray as xr
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from shutil import copy
from sklearn.model_selection import StratifiedKFold
import torch
from tempfile import TemporaryDirectory

from albk.data.utils import idx_to_locate
use_disjoint_files = False
from torch.utils.data import TensorDataset, DataLoader


import torch
import torch.nn as nn

from glob import glob
from os.path import expanduser, join, basename, dirname
import xarray as xr
import numpy as np
from tqdm import tqdm
import pandas as pd
from joblib import Parallel, delayed
from itertools import product
from astra.torch.models import EfficientNetClassifier,EfficientNet_B0_Weights   
from astra.torch.utils import train_fn

import torchvision.models as models
from astra.torch.metrics import accuracy_score, f1_score, precision_score, recall_score

In [3]:
# Load the saved tensors
loaded_data = torch.load("/home/rishabh.mondal/Brick-Kilns-project/albk_rishabh/tensor_data/data.pt")

# Access the tensors
index = loaded_data['index']
images = loaded_data['images']
labels = loaded_data['labels']


In [4]:
import torchvision.transforms as transforms

images=images[:2000]
labels=labels[:2000]
images = images / 255
    # mean normalize
images = (images - images.mean(dim=(0, 2, 3), keepdim=True)) / images.std(dim=(0, 2, 3), keepdim=True)
images.shape, labels.shape
aug = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomResizedCrop(224,scale=(0.2,1.0)),
    transforms.RandomGrayscale(p=0.2),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
    transforms.GaussianBlur(kernel_size=23, sigma=(0.1, 2.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=50),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [5]:
images = aug(images)




In [6]:
from collections import Counter

fold_data = []  # List to store data from each fold

seed = 42  # Use your desired random seed
splitter = StratifiedKFold(n_splits=4, shuffle=True, random_state=seed)
images = images / 255
    # mean normalize
images = (images - images.mean(dim=(0, 2, 3), keepdim=True)) / images.std(dim=(0, 2, 3), keepdim=True)
for fold, (train_idx, test_idx) in enumerate(splitter.split(images, labels)):
    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    # Count occurrences of each class in train and test sets
    train_counter = Counter(y_train.numpy())
    test_counter = Counter(y_test.numpy())
    print(train_counter)
    print(test_counter)
    print(f"Fold {fold + 1} - Train: {train_counter}, Test: {test_counter}")

    fold_data.append({
        'fold': fold + 1,
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test,
        'train_counter': train_counter,
        'test_counter': test_counter
    })

Counter({0: 1369, 1: 131})
Counter({0: 457, 1: 43})
Fold 1 - Train: Counter({0: 1369, 1: 131}), Test: Counter({0: 457, 1: 43})
Counter({0: 1369, 1: 131})
Counter({0: 457, 1: 43})
Fold 2 - Train: Counter({0: 1369, 1: 131}), Test: Counter({0: 457, 1: 43})
Counter({0: 1370, 1: 130})
Counter({0: 456, 1: 44})
Fold 3 - Train: Counter({0: 1370, 1: 130}), Test: Counter({0: 456, 1: 44})
Counter({0: 1370, 1: 130})
Counter({0: 456, 1: 44})
Fold 4 - Train: Counter({0: 1370, 1: 130}), Test: Counter({0: 456, 1: 44})


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size=512
# Lists to store metrics for each fold
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

for fold_info in fold_data:
    fold = fold_info['fold']
    print("Fold: ", fold)
    X_train = fold_info['X_train']
    y_train = fold_info['y_train']
    X_test = fold_info['X_test']
    y_test = fold_info['y_test']
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoader for training and testing
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    # Create and train the model
    #print datatype of trainloader
    print("trainloader datatype: ", train_loader.dataset.tensors[1].dtype)
    print("testloader datatype: ", test_loader.dataset.tensors[0].dtype)
    train_model = EfficientNetClassifier    (
        models.efficientnet_b0, EfficientNet_B0_Weights, n_classes=2, activation=nn.ReLU(), dropout=0.1
    ).to(device)

    iter_losses, epoch_losses = train_fn(
        train_model,
        nn.CrossEntropyLoss(),
        dataloader=train_loader,
        lr=3e-4,
        epochs=100,
        verbose=True,
        wandb_log=False,
    )

    # Evaluate the model on the test set
    with torch.no_grad():
        pred_classes = train_model.predict_class(
            dataloader=test_loader, batch_size=batch_size, verbose=True
        ).to(device)

    test_labels = y_test.to(device)
    # Calculate and print metrics for each fold
    
    accuracy = accuracy_score(pred_classes,test_labels)
    precision = precision_score( pred_classes,test_labels)
    recall = recall_score( pred_classes,test_labels)
    f1 = f1_score( pred_classes,test_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\n")
    # Calculate and store metrics for each fold
    accuracy_list.append(accuracy_score( pred_classes,test_labels))
    precision_list.append(precision_score( pred_classes,test_labels))
    recall_list.append(recall_score( pred_classes,test_labels))
    f1_list.append(f1_score( pred_classes,test_labels))

# Calculate and print the mean of metrics across all folds
print("Mean Accuracy: ", sum(accuracy_list) / len(accuracy_list))
print("Mean Precision: ", sum(precision_list) / len(precision_list))
print("Mean Recall: ", sum(recall_list) / len(recall_list))
print("Mean F1: ", sum(f1_list) / len(f1_list))

Fold:  1
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00018147: 100%|██████████| 100/100 [03:55<00:00,  2.36s/it]
100%|██████████| 1/1 [00:00<00:00,  4.83it/s]


Accuracy: 0.9440
Precision: 0.7778
Recall: 0.4884
F1 Score: 0.6000


Fold:  2
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00088380: 100%|██████████| 100/100 [03:33<00:00,  2.14s/it]
100%|██████████| 1/1 [00:00<00:00,  6.93it/s]


Accuracy: 0.9460
Precision: 0.7667
Recall: 0.5349
F1 Score: 0.6301


Fold:  3
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00172171: 100%|██████████| 100/100 [04:31<00:00,  2.71s/it]
100%|██████████| 1/1 [00:00<00:00,  1.39it/s]


Accuracy: 0.9420
Precision: 0.7586
Recall: 0.5000
F1 Score: 0.6027


Fold:  4
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00771242: 100%|██████████| 100/100 [05:28<00:00,  3.28s/it]
100%|██████████| 1/1 [00:00<00:00,  5.47it/s]


Accuracy: 0.9360
Precision: 0.7727
Recall: 0.3864
F1 Score: 0.5152


Mean Accuracy:  tensor(0.9420, device='cuda:0')
Mean Precision:  tensor(0.7689, device='cuda:0')
Mean Recall:  tensor(0.4774, device='cuda:0')
Mean F1:  tensor(0.5870, device='cuda:0')


In [8]:
# Load the saved tensors
loaded_data = torch.load("/home/rishabh.mondal/Brick-Kilns-project/albk_rishabh/tensor_data/test_data.pt")

# Access the tensors
index1 = loaded_data['index']
images1 = loaded_data['images']
labels1= loaded_data['labels']

In [13]:
images1=images1[:2000]
labels1=labels1[:2000]
images1.shape, labels1.shape
images1 = images1 / 255
    # mean normalize
images1 = (images1 - images1.mean(dim=(0, 2, 3), keepdim=True)) / images1.std(dim=(0, 2, 3), keepdim=True)
images1=aug(images1)



In [14]:
from collections import Counter

fold_data1 = []  # List to store data from each fold

seed = 42  # Use your desired random seed
splitter = StratifiedKFold(n_splits=4, shuffle=True, random_state=seed)
# images1 = images1 / 255
#     # mean normalize
# images1 = (images1 - images1.mean(dim=(0, 2, 3), keepdim=True)) / images1.std(dim=(0, 2, 3), keepdim=True)
for fold, (train_idx, test_idx) in enumerate(splitter.split(images1, labels1)):
    X_train, X_test = images1[train_idx], images1[test_idx]
    y_train, y_test = labels1[train_idx], labels1[test_idx]

    # Count occurrences of each class in train and test sets
    train_counter = Counter(y_train.numpy())
    test_counter = Counter(y_test.numpy())
    print(train_counter)
    print(test_counter)
    print(f"Fold {fold + 1} - Train: {train_counter}, Test: {test_counter}")

    fold_data1.append({
        'fold': fold + 1,
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test,
        'train_counter': train_counter,
        'test_counter': test_counter
    })

Counter({0: 1308, 1: 192})
Counter({0: 437, 1: 63})
Fold 1 - Train: Counter({0: 1308, 1: 192}), Test: Counter({0: 437, 1: 63})
Counter({0: 1309, 1: 191})
Counter({0: 436, 1: 64})
Fold 2 - Train: Counter({0: 1309, 1: 191}), Test: Counter({0: 436, 1: 64})
Counter({0: 1309, 1: 191})
Counter({0: 436, 1: 64})
Fold 3 - Train: Counter({0: 1309, 1: 191}), Test: Counter({0: 436, 1: 64})
Counter({0: 1309, 1: 191})
Counter({0: 436, 1: 64})
Fold 4 - Train: Counter({0: 1309, 1: 191}), Test: Counter({0: 436, 1: 64})


In [15]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size=512
# Lists to store metrics for each fold
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

for fold_info in fold_data1:
    fold = fold_info['fold']
    print("Fold: ", fold)
    X_train = fold_info['X_train']
    y_train = fold_info['y_train']
    X_test = fold_info['X_test']
    y_test = fold_info['y_test']
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoader for training and testing
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    # Create and train the model
    #print datatype of trainloader
    print("trainloader datatype: ", train_loader.dataset.tensors[1].dtype)
    print("testloader datatype: ", test_loader.dataset.tensors[0].dtype)
    train_model = EfficientNetClassifier    (
        models.efficientnet_b0, EfficientNet_B0_Weights, n_classes=2, activation=nn.ReLU(), dropout=0.1
    ).to(device)

    iter_losses, epoch_losses = train_fn(
        train_model,
        nn.CrossEntropyLoss(),
        dataloader=train_loader,
        lr=3e-4,
        epochs=100,
        verbose=True,
        wandb_log=False,
    )

    # Evaluate the model on the test set
    with torch.no_grad():
        pred_classes = train_model.predict_class(
            dataloader=test_loader, batch_size=batch_size, verbose=True
        ).to(device)

    test_labels = y_test.to(device)
    # Calculate and print metrics for each fold
    
    accuracy = accuracy_score(pred_classes,test_labels)
    precision = precision_score( pred_classes,test_labels)
    recall = recall_score( pred_classes,test_labels)
    f1 = f1_score( pred_classes,test_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\n")
    # Calculate and store metrics for each fold
    accuracy_list.append(accuracy_score( pred_classes,test_labels))
    precision_list.append(precision_score( pred_classes,test_labels))
    recall_list.append(recall_score( pred_classes,test_labels))
    f1_list.append(f1_score( pred_classes,test_labels))

# Calculate and print the mean of metrics across all folds
print("Mean Accuracy: ", sum(accuracy_list) / len(accuracy_list))
print("Mean Precision: ", sum(precision_list) / len(precision_list))
print("Mean Recall: ", sum(recall_list) / len(recall_list))
print("Mean F1: ", sum(f1_list) / len(f1_list))



Fold:  1
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00283062: 100%|██████████| 100/100 [05:38<00:00,  3.39s/it]
100%|██████████| 1/1 [00:00<00:00,  6.33it/s]


Accuracy: 0.8800
Precision: 0.5517
Recall: 0.2540
F1 Score: 0.3478


Fold:  2
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00087110: 100%|██████████| 100/100 [05:06<00:00,  3.06s/it]
100%|██████████| 1/1 [00:00<00:00,  7.12it/s]


Accuracy: 0.8920
Precision: 0.6923
Recall: 0.2812
F1 Score: 0.4000


Fold:  3
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00034782: 100%|██████████| 100/100 [05:38<00:00,  3.38s/it]
100%|██████████| 1/1 [00:00<00:00,  7.39it/s]


Accuracy: 0.8940
Precision: 0.7895
Recall: 0.2344
F1 Score: 0.3614


Fold:  4
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00173927: 100%|██████████| 100/100 [05:24<00:00,  3.24s/it]
100%|██████████| 1/1 [00:00<00:00,  7.01it/s]


Accuracy: 0.8740
Precision: 0.5128
Recall: 0.3125
F1 Score: 0.3883


Mean Accuracy:  tensor(0.8850, device='cuda:0')
Mean Precision:  tensor(0.6366, device='cuda:0')
Mean Recall:  tensor(0.2705, device='cuda:0')
Mean F1:  tensor(0.3744, device='cuda:0')


In [16]:
print(images.shape, labels.shape)
print(images1.shape, labels1.shape)  
images = images / 255
    # mean normalize
images = (images - images.mean(dim=(0, 2, 3), keepdim=True)) / images.std(dim=(0, 2, 3), keepdim=True)
images1 = images1 / 255
    # mean normalize
images1 = (images1 - images1.mean(dim=(0, 2, 3), keepdim=True)) / images1.std(dim=(0, 2, 3), keepdim=True)
train_dataset = TensorDataset(images, labels)
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=8)
test_dataset = TensorDataset(images1, labels1)
test_loaderr = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=8)

torch.Size([2000, 3, 224, 224]) torch.Size([2000])
torch.Size([2000, 3, 224, 224]) torch.Size([2000])


In [17]:
train_model = EfficientNetClassifier(
    models.efficientnet_b0,EfficientNet_B0_Weights, n_classes=2, activation=nn.ReLU(), dropout=0.1
).to(device)

iter_losses, epoch_losses = train_fn(
    train_model,
    nn.CrossEntropyLoss(),
    dataloader=train_loader,
    lr=3e-4,
    epochs=100,
    verbose=True,
    wandb_log=False,
)

Loss: 0.00016192: 100%|██████████| 100/100 [10:45<00:00,  6.45s/it]


In [18]:
print(len(labels1))
print(len(images1))
test_dataset = TensorDataset(images1, labels1)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=8)
with torch.no_grad():
    pred_classes =train_model.predict_class(
        dataloader=test_loader, batch_size=254, verbose=True
    ).to(device)
print(len(pred_classes))
test_labels = labels1.to(device)
print("Accuracy: ", accuracy_score(pred_classes,test_labels))
print("Precision: ", precision_score(pred_classes,test_labels))
print("Recall: ", recall_score(pred_classes,test_labels))
print("F1: ", f1_score(pred_classes,test_labels))

2000
2000


100%|██████████| 4/4 [00:04<00:00,  1.21s/it]

2000
Accuracy:  tensor(0.8695, device='cuda:0')
Precision:  tensor(0., device='cuda:0')
Recall:  tensor(0., device='cuda:0')
F1:  tensor(0., device='cuda:0')





: 