In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("/data/tim/heronWorkspace/src")

In [2]:

from AEHeronModelV1 import AEHeronModel
from lightning.pytorch.callbacks import ModelSummary
from torchsummary import summary
import HeronImageLoader
from torch.utils.data import DataLoader, BatchSampler
from matplotlib import pyplot as plt
import lightning.pytorch as pl
from lightning.pytorch.tuner import Tuner
import pandas as pd
from lightning.pytorch.loggers import CSVLogger
from MLPV1 import MLP
from models import MLPBasic, CAEBigBottleneck
import numpy as np
import torch.nn.functional as F
import torch


In [None]:
# Model
cae = AEHeronModel(batch_size=16, num_workers_loader=4)
summary(cae, (3, 215, 323), device="cpu")

In [None]:
# Find learning rate
trainer = pl.Trainer( accelerator='cuda', max_epochs=5, devices=[1]) # devices is the index of the gpu, callbacks=[FineTuneLearningRateFinder(milestones=(5, 10))],
tuner = Tuner(trainer)
lr_finder = tuner.lr_find(cae)
lr_finder.plot(show=True, suggest=True)
print(lr_finder.suggestion())

In [None]:
# Train
trainer = pl.Trainer( accelerator='cuda', max_epochs=1, logger=CSVLogger(save_dir="logs/", name="my-model"), log_every_n_steps=1) # devices is the index of the gpu, callbacks=[FineTuneLearningRateFinder(milestones=(5, 10))],
trainer.fit(cae)


In [None]:
### Plot

metrics = pd.read_csv(f"/data/tim/heronWorkspace/logs/basicMLPV1/version_0/metrics.csv")

aggreg_metrics = []
agg_col = "epoch"
for i, dfg in metrics.groupby(agg_col):
    agg = dict(dfg.mean())
    agg[agg_col] = i
    aggreg_metrics.append(agg)

df_metrics = pd.DataFrame(aggreg_metrics)
df_metrics[["train_loss", "val_loss"]].plot(
    grid=True, legend=True, xlabel="Epoch", ylabel="Loss"
)

plt.savefig("loss_over_epochs.jpg")

# df_metrics[["train_acc", "val_acc"]].plot(
#     grid=True, legend=True, xlabel="Epoch", ylabel="ACC"
# )

# plt.savefig("suggest_acc.pdf")

plt.show()


In [None]:
# predict some images
# cae = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/lightning_logs/version_18/checkpoints/epoch=0-step=236.ckpt")
# trainer = pl.Trainer()
caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/lightning_logs/version_32/checkpoints/epoch=0-step=2.ckpt")
trainer.predict(caeLoaded)

In [None]:
# play around with different loader settings
data = HeronImageLoader.HeronDataset()
loader = DataLoader(data, batch_size=2, num_workers=1, shuffle=False) # batch_size=64, num_workers=3
unnorm = HeronImageLoader.UnNormalize()
for i, (imArr, _, _) in enumerate(loader):
    # print(imArr[0])
    for j in range(len(imArr)):
        print("Normalized:")
        print(f'Mean: ' + str(imArr[j].mean(axis=(1, 2))))
        plt.imshow(imArr[j].permute(1, 2, 0))
        plt.show()
        unnormArr = unnorm(imArr)

        print("UnNormalized:")
        plt.imshow(unnormArr[j].permute(1, 2, 0))
        plt.show()
    if i >0 :
        break

In [None]:
metrics = pd.read_csv("/data/tim/heronWorkspace/logs/basicCAE/version_0/metrics.csv")

aggreg_metrics = []
agg_col = "epoch"
for i, dfg in metrics.groupby(agg_col):
    agg = dict(dfg.mean())
    agg[agg_col] = i
    aggreg_metrics.append(agg)

df_metrics = pd.DataFrame(aggreg_metrics)
df_metrics[["train_loss", "val_loss"]].plot(
    grid=True, legend=True, xlabel="Epoch", ylabel="Loss"
)
plt.savefig("suggest_loss.jpg")

In [None]:
caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAE/version_0/checkpoints/epoch=9-step=630.ckpt")
trainer = pl.Trainer()
trainer.predict(caeLoaded)

In [None]:
# basic model with 150 epochs and big bottleneck

caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAEBigBottleneck/version_0/checkpoints/epoch=149-step=35400.ckpt")
dataLoader = DataLoader(HeronImageLoader.HeronDataset(set="onlyPos", resize_to=(215, 323)), batch_size=16, shuffle=False, num_workers=4)
trainer = pl.Trainer()
res = trainer.predict(caeLoaded, dataloaders=dataLoader)

In [None]:
# basic model with 10 epochs and big bottleneck
caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAE/version_0/checkpoints/epoch=9-step=630.ckpt")
dataLoader = DataLoader(HeronImageLoader.HeronDataset(set="onlyPos", resize_to=(215, 323)), batch_size=16, shuffle=False, num_workers=4)
trainer = pl.Trainer()
res = trainer.predict(caeLoaded, dataloaders=dataLoader)

In [None]:
dataset = HeronImageLoader.HeronDataset(set="testMLP", resize_to=(215, 323))
print(len(dataset))

In [None]:
# train mlp
caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAE/version_0/checkpoints/epoch=9-step=630.ckpt")
caeLoaded.freeze()
mlp = MLP(mlpModel=MLPBasic(), cae=caeLoaded, batch_size=16, num_workers_loader=4)
trainer = pl.Trainer(max_epochs=1, accelerator='cuda', log_every_n_steps=1)
trainer.fit(mlp)

In [None]:
# test mlp
trainer = pl.Trainer(max_epochs=1, accelerator='cuda', log_every_n_steps=1)
caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAE/version_0/checkpoints/epoch=9-step=630.ckpt")
caeLoaded.freeze()
mlpLoaded = MLP.load_from_checkpoint("/data/tim/heronWorkspace/lightning_logs/version_58/checkpoints/epoch=0-step=155.ckpt", cae=caeLoaded, mlpModel=MLPBasic())
trainer.predict(mlpLoaded)

In [None]:
df1 = pd.read_csv("/data/shared/herons/TinaDubach_data/CameraData_2017_july.csv", encoding='unicode_escape', on_bad_lines="warn", sep=";")
df2 = pd.read_csv("/data/tim/heronWorkspace/ImageData/imagePropsSBU4.csv", on_bad_lines="warn")
df = pd.merge(df1, df2, left_on="fotocode", how="right", right_on="ImagePath")
df.sort_values(by=["ImagePath"], inplace=True)
df.head(10)

In [None]:
# test distance from last prediction to current

def heatMap(before: torch.Tensor, after: torch.Tensor, stepY, stepX):
    heatMap = []
    for i in range(0, before.shape[-2]-stepY+1, stepY):
        row = []
        for j in range(0, before.shape[-1]-stepX+1, stepX):
            row.append(F.mse_loss(before[:, i:i+stepY, j:j+stepX], after[:, i:i+stepY, j:j+stepX]).item())
        heatMap.append(row)
    return torch.tensor(heatMap).type_as(before)

caeLoaded = AEHeronModel.load_from_checkpoint("/data/tim/heronWorkspace/logs/basicCAE/version_0/checkpoints/epoch=9-step=630.ckpt")
caeLoaded.freeze()
dataLoader = DataLoader(HeronImageLoader.HeronDataset(set="test", resize_to=(215, 323), sorted=True), batch_size=1, shuffle=False, num_workers=4)
print(len(dataLoader.dataset.imagePaths))
unnorm = HeronImageLoader.UnNormalize()

stepY = 5
stepX = 5

lastImd = np.zeros((int(215/stepY), int(323/stepX)))
for i, img in enumerate(list(dataLoader)[200:]):
    # print(img[0].size())
    # plt.imshow(unnorm(img[0][0]).permute(1, 2, 0))
    img = img[0].to(caeLoaded.device)
    pred = caeLoaded(img)
    # img = unnorm(img[0][0]).permute(1, 2, 0).numpy()
    # pred = unnorm(pred[0].cpu()).permute(1, 2, 0).numpy()
    
    img, pred = [unnorm(x) for x in [img[0], pred[0]]]
    imd = heatMap(img, pred, stepY, stepX)

    img, pred = [x.permute(1, 2, 0).cpu().numpy() for x in [img, pred]]
    imd = imd.cpu().numpy()
    # imd = 0.0 + np.sum(img - pred, axis=2)**2
    # imd = np.linalg.norm(im - x, axis=2)

    # imd = imd / (np.max(imd) - np.min(imd))
    # imd = (imd - np.min(imd)) / (np.max(imd) - np.min(imd))

    f, a = plt.subplots(1,5, figsize=(50,10))
    # f.suptitle(fi)

   
    a[0].imshow(img)
    a[1].imshow(pred)
    ma = a[2].imshow(np.abs(imd), cmap="hot", interpolation='none')
    a[3].imshow(np.abs(imd - lastImd), cmap="hot", interpolation='none')

    diff = imd - lastImd
    a[4].imshow(np.where(diff < 0, 0, diff), cmap="hot", interpolation='none')

    plt.show()

    lastImd = imd
    
    if (i > 100):
        break

# Camera evaluation
Evaluate which is the best camera to train

In [29]:
cameraDataDF = pd.read_csv("/data/shared/herons/TinaDubach_data/CameraData_2017_july.csv", encoding='unicode_escape', on_bad_lines="warn", sep=";")
cameraDataDF = cameraDataDF[cameraDataDF["code"].isna()]
cameraDataDF

Unnamed: 0,camera,mode,date,time,code,code1,temperatur,infrared,species,stream,stream section,grey heron,fotocode,forname,year
434,KBU4,,,,,,,,,KBU,4,0,2017_KBU4_NA,_,2017
1509,SBU1,,,,,,,,"06120841-06140999 camera turned 90°, only bank...",SBU,1,0,2017_SBU1_NA,_,2017
1510,SBU1,,,,,,,,06140001-6140073; 0615007406150128; only bank 90°,SBU,1,0,2017_SBU1_NA,_,2017
1511,SBU1,,,,,,,,06150129-06230999 same,SBU,1,0,2017_SBU1_NA,_,2017
1512,SBU1,,,,,,,,06230001-06270348 total 348,SBU,1,0,2017_SBU1_NA,_,2017
4055,SGN1,,,,,,,,Red-crested pochard (Netta rufina),SGN,1,0,2017_SGN1_NA,_,2017
4170,SGN2,,,,,,,,,SGN,2,0,2017_SGN2_NA,_,2017


In [32]:
description = {
    "SBU4": "house in the background with a lot of anomalies, lots of herons in the test set",
    "SBU1": "house and street beside the river with lots of anomalies, cam wrong position for some photos",
    "NEN1": "good camera, not many anomalies in the background, change of camera position in the end of the summer",
    "SGN1": "camera in reed plants for ~1/2 of the samples, could possibly be used for the first 1/2 part",
    "KBU1" : "sreet in the background, many cars",
    "SGN3" : "street with many people in the background",
    "SBU3" : "good camera, cable in front of it, not much going on in the background",
    "SGN4" : "street with many people in the background",
    "NEN3" : "street in the background with cars, could be cut out, otherwise calm background",
    "SBU2" : "path in the background, medium many people",

}

# evaluation if different camera trap is better (concretely GBU1)
cameraDataDF = pd.read_csv("/data/shared/herons/TinaDubach_data/CameraData_2017_july.csv", encoding='unicode_escape', on_bad_lines="warn", sep=";")
#cameraDataDF.describe()
folders = cameraDataDF.groupby(["camera"]).size().sort_values(ascending=False)
print(folders)
foldersDF = cameraDataDF[(cameraDataDF["code"].notna())].groupby(["camera"]).agg(nrAnomalies=("grey heron", "count"), nrHerons = ("grey heron", "sum")).sort_values(by=["nrAnomalies"], ascending=False)
foldersDF["description"] = [description[x] if x in description.keys() else None for x in foldersDF.index]
foldersDF.to_csv("camEval.csv")

camera
SBU4    1932
SBU1     313
NEN1     261
SGN1     239
KBU1     233
SGN3     231
SBU3     214
SGN4     213
NEN3     177
SBU2     156
NEN2     131
SGN2     115
KBU2     101
PSU1      70
NEN4      64
PSU3      42
GBU4      39
GBU1      29
PSU2      20
KBU3      18
KBU4      15
dtype: int64


In [16]:
cameraDataDF[(cameraDataDF["camera"] == "SBU1") & (cameraDataDF["grey heron"] == 0) & (cameraDataDF["code"].notna())].head(20)

Unnamed: 0,camera,mode,date,time,code,code1,temperatur,infrared,species,stream,stream section,grey heron,fotocode,forname,year
1233,SBU1,m,2017-01-30,17:01:30,1300388.0,1300388.0,4.0,n,Mallard (Anas platyrhynchos),SBU,1,0,2017_SBU1_01300388,_,2017
1475,SBU1,t,,,5250019.0,5250019.0,,n,Common Merganser (Mergus merganser),SBU,1,0,2017_SBU1_05250019,_,2017
1476,SBU1,t,,,6010712.0,6010712.0,,,Yellow-legged Gull (Larus michahellis),SBU,1,0,2017_SBU1_06010712,_,2017
1484,SBU1,t,,,6060269.0,6060269.0,,,Common Merganser (Mergus merganser),SBU,1,0,2017_SBU1_06060269,_,2017
1497,SBU1,,,,6290617.0,6290617.0,,,Common Merganser (Mergus merganser),SBU,1,0,2017_SBU1_06290617,_,2017
1504,SBU1,,,,7060317.0,7060317.0,,,Eurasian Sparrowhawk (Accipiter nisus),SBU,1,0,2017_SBU1_07060317,_,2017
