In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from pathlib import Path

DATA_ROOT = Path("/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer")
OUT_CSV = Path("/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/larch_preprocessing/larch_images_labelled.csv")

In [None]:
# image extensions to include
EXTS = {".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"}

# def is_ignored(p: Path) -> bool:
#     return any(part == "CNN_classifier_old" for part in p.parts)

def images_under_images_dir(site_dir: Path):
    images_dir = site_dir / "Images"
    if not images_dir.is_dir():
        return []
    return [p for p in images_dir.rglob("*")
            if p.is_file() and (p.suffix in EXTS)]

sub_folders = [d for d in sorted(DATA_ROOT.iterdir())
             if d.is_dir()] # and not is_ignored(d)]

image_paths = []
for sf in sub_folders:
    image_paths.extend(images_under_images_dir(sf))

image_paths = sorted(set(image_paths), key=lambda p: str(p))

print(f"Found {len(image_paths)} images.")


Found 1537 images.


In [None]:
image_paths[1:5]

[PosixPath('/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0005.JPG'),
 PosixPath('/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0006.JPG'),
 PosixPath('/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0007.JPG'),
 PosixPath('/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0012.JPG')]

In [None]:
REL_ROOT = DATA_ROOT.parent

print(REL_ROOT)

/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer


In [None]:
import pandas as pd

def image_id(p: Path) -> str:
    return p.stem

df = pd.DataFrame({
    "image_path": [str(p.relative_to(REL_ROOT)) for p in image_paths]
})

df["label"] = 2  # all images labeled as 2
df.head(5)

Unnamed: 0,image_path,label
0,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
1,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
2,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
3,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
4,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2


In [None]:
cols = ["image_path", "label"]
df[cols].to_csv(OUT_CSV, index=False)
print("Wrote:", OUT_CSV)

Wrote: /content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/larch_preprocessing/larch_images_labelled.csv


In [None]:
# df.label.value_counts()

In [None]:
'''
IRRELEVANT : EXTRA INFORMATION FROM IMAGES - DATE OF IMAGE TAKEN
'''

def extract_image_dates(p: Path) -> str:
    rel = p.relative_to(DATA_ROOT)
    return rel.parts[0] if len(rel.parts) > 0 else "unknown"

misc_image_dates = [extract_image_dates(p) for p in image_paths]

### Data augmentation + Metric computation

In [4]:
DATA_ROOT = Path("/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/Data_Set_Larch_Casebearer")
LABELLED_DATA_PATH = Path("/content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/larch_preprocessing")

INPUT_CSV  = LABELLED_DATA_PATH / "larch_images_labelled.csv"
OUTPUT_CSV = LABELLED_DATA_PATH / "larch_images_with_features.csv"

AUGMENTS_PER_IMAGE = 3
RANDOM_SEED = 123

In [5]:
import random
random.seed(RANDOM_SEED)

import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

import cv2
from skimage.measure import shannon_entropy
from PIL import Image, ImageEnhance, ImageOps, ImageFilter

In [None]:
def apply_trivial_augment(image: Image.Image) -> Image.Image:
    """Applies 1–3 randomly chosen simple transforms."""
    ops = [
        lambda x: x.rotate(random.uniform(-30, 30)),
        lambda x: ImageEnhance.Color(x).enhance(random.uniform(0.6, 1.4)),
        lambda x: ImageOps.mirror(x),
        lambda x: ImageOps.autocontrast(x),
        lambda x: x.filter(ImageFilter.GaussianBlur(radius=random.uniform(0, 1.5))),
        lambda x: ImageOps.solarize(x, threshold=random.randint(64, 192)),
    ]
    random.shuffle(ops)
    k = random.randint(1, 3)
    for t in ops[:k]:
        image = t(image)
    return image

In [None]:
AUG_CSV = LABELLED_DATA_PATH / "larch_images_augmented.csv"
AUG_DIR = DATA_ROOT / "augmented_images"

df_in = pd.read_csv(INPUT_CSV)
new_rows = []

for _, row in tqdm(df_in.iterrows(), total=len(df_in), desc="Augmenting"):
    rel_path = Path(str(row["image_path"]).replace("\\", "/"))
    abs_path = DATA_ROOT.parent / rel_path
    if not abs_path.exists():
        continue
    try:
        img = Image.open(abs_path).convert("RGB")
    except Exception:
        continue
    for i in range(AUGMENTS_PER_IMAGE):
        aug_img  = apply_trivial_augment(img)
        aug_name = f"{rel_path.stem}_trivialaug{i}{rel_path.suffix}"
        aug_rel  = Path("augmented_images") / rel_path.parent.name / aug_name
        aug_abs  = AUG_DIR / rel_path.parent.name / aug_name
        aug_abs.parent.mkdir(parents=True, exist_ok=True)
        aug_img.save(aug_abs)
        new_rows.append({
            "image_path": aug_rel.as_posix(),
            "label": int(row["label"]),
        })

df_aug = pd.concat([df_in[["image_path","label"]], pd.DataFrame(new_rows)], ignore_index=True)
df_aug.to_csv(AUG_CSV, index=False)
print("Wrote:", AUG_CSV, " | rows:", len(df_aug))
df_aug.head(5)

Augmenting: 100%|██████████| 1537/1537 [15:26<00:00,  1.66it/s]

Wrote: /content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/larch_preprocessing/larch_images_augmented.csv  | rows: 6148





Unnamed: 0,image_path,label
0,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
1,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
2,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
3,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2
4,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2


In [6]:
import numpy as np
import cv2
from skimage.measure import shannon_entropy

def compute_edge_density_bgr(img_bgr) -> float:
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return float(np.mean(edges > 0))

def compute_image_features(img_path: Path, resize_to=(224,224)):
    """
    Returns (brightness, edge_density, entropy) or (None,None,None) if read fails.
    """
    img = cv2.imread(str(img_path))
    if img is None:
        return (None, None, None)
    if resize_to:
        img = cv2.resize(img, resize_to)

    brightness   = float(np.mean(img))
    edge_density = compute_edge_density_bgr(img)
    entropy      = float(shannon_entropy(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)))
    return (brightness, edge_density, entropy)


In [7]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm

AUG_CSV   = LABELLED_DATA_PATH / "larch_images_augmented.csv"
ALPHA_CSV = LABELLED_DATA_PATH / "larch_images_alpha.csv"

df_aug = pd.read_csv(AUG_CSV)

feat = []
for _, row in tqdm(df_aug.iterrows(), total=len(df_aug), desc="Computing features"):
    rel = Path(str(row["image_path"]).replace("\\", "/"))
    p1 = (DATA_ROOT / rel)
    p2 = (DATA_ROOT.parent / rel)
    abs_path = p1 if p1.exists() else p2
    b, e, h = compute_image_features(abs_path)
    feat.append((b, e, h))

df_alpha = df_aug.copy()
df_alpha[["brightness","edge_density","entropy"]] = pd.DataFrame(feat, columns=["brightness","edge_density","entropy"])
df_alpha.to_csv(ALPHA_CSV, index=False)
print("Wrote:", ALPHA_CSV, "| rows:", len(df_alpha))
df_alpha.head(5)

Computing features: 100%|██████████| 6148/6148 [1:06:13<00:00,  1.55it/s]


Wrote: /content/drive/MyDrive/Research/Independent study/temp_rerun/larch_casebearer/larch_preprocessing/larch_images_alpha.csv | rows: 6148


Unnamed: 0,image_path,label,brightness,edge_density,entropy
0,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2,133.354193,0.28705,7.229616
1,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2,145.09836,0.322545,7.313222
2,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2,129.096128,0.336296,7.435486
3,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2,140.640047,0.291036,7.143443
4,Data_Set_Larch_Casebearer/Bebehojd_20190527/Im...,2,144.207217,0.321349,7.043336
