In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

import os
import pickle
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score


In [None]:
import os
import pandas as pd

def build_dataframe_flat(data_dir):
    data = []
    for img in os.listdir(data_dir):
        if img.lower().endswith(('.jpg', '.png', '.bmp')):

            parts = img.split('_')[0].split('-')
            if len(parts) == 2:
                label = f"{parts[0]}-{parts[1]}"
                path = os.path.join(data_dir, img)
                data.append((label, path))
    return pd.DataFrame(data, columns=["Label", "Path"])

df = build_dataframe_flat("/content/drive/MyDrive/AML-PROJECT/degraded/")
print(df.head())
print("Total samples:", len(df))
print("Total unique identities:", df['Label'].nunique())

   Label                                               Path
0  828-L  /content/drive/MyDrive/AML-PROJECT/degraded/82...
1  828-L  /content/drive/MyDrive/AML-PROJECT/degraded/82...
2  828-L  /content/drive/MyDrive/AML-PROJECT/degraded/82...
3  828-R  /content/drive/MyDrive/AML-PROJECT/degraded/82...
4  828-R  /content/drive/MyDrive/AML-PROJECT/degraded/82...
Total samples: 17567
Total unique identities: 1758


In [None]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade.csv')
print("Total samples:", len(df))
print("Unique classes:", df['Label'].nunique())

Total samples: 17567
Unique classes: 1758


In [None]:
# 2. Extract ID only (drop -L/-R) and filter 100 individuals
df['ID'] = df['Label'].str.extract(r'(\d+)', expand=False)
id_counts = df['ID'].value_counts()
valid_ids = id_counts[id_counts >= 10].head(100).index
df = df[df['ID'].isin(valid_ids)]


In [None]:
print("Total samples:", len(df))
print("Unique classes:", df['Label'].nunique())

Total samples: 2000
Unique classes: 200


In [None]:
train_data = []
val_data = []
test_data = []

for label, group in df.groupby('Label'):
    if len(group) >= 10:
        sampled = group.sample(n=7, random_state=42).reset_index(drop=True)
        train_data.extend(sampled.iloc[:5].to_dict(orient='records'))
        val_data.extend(sampled.iloc[5:6].to_dict(orient='records'))
        test_data.extend(sampled.iloc[6:7].to_dict(orient='records'))


train_df = pd.DataFrame(train_data)
val_df = pd.DataFrame(val_data)
test_df = pd.DataFrame(test_data)

train_df.to_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade_train.csv', index=False)
val_df.to_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade_val.csv', index=False)
test_df.to_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade_test.csv', index=False)

print("Train samples:", len(train_df))
print("Val samples:", len(val_df))
print("Test samples:", len(test_df))


Train samples: 8780
Val samples: 1756
Test samples: 1756


## Aug

In [None]:
from tensorflow.keras.preprocessing.image import apply_affine_transform

In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
from scipy.ndimage import rotate

save_dir = "/content/drive/MyDrive/AML-PROJECT/23_aug"
os.makedirs(save_dir, exist_ok=True)

df_augmented = []

def stretch_and_rotate(image):
    h_scale = tf.random.uniform([], 1.1, 1.3)
    w_scale = tf.random.uniform([], 0.8, 1.0)

    new_height = tf.cast(120 * h_scale, tf.int32)
    new_width = tf.cast(160 * w_scale, tf.int32)

    image = tf.image.resize(image, size=[new_height, new_width])
    image = tf.image.resize_with_pad(image, target_height=120, target_width=160)

    angle = tf.random.uniform([], -10, 10).numpy()
    image_np = image.numpy().squeeze()
    rotated = rotate(image_np, angle, reshape=False, mode='nearest')
    rotated = np.expand_dims(rotated, axis=-1)
    return tf.convert_to_tensor(rotated, dtype=tf.float32)

for idx, row in df.iterrows():
    img_path = row['Path']
    label = row['Label']

    try:
        image = tf.io.read_file(img_path)
        image = tf.image.decode_image(image, channels=1)
        image.set_shape([None, None, 1])
        image = tf.image.convert_image_dtype(image, tf.float32)

        aug_image = stretch_and_rotate(image)

        save_path = os.path.join(save_dir, f"{label}_aug_{idx}.png")
        tf.keras.utils.save_img(save_path, aug_image)

        df_augmented.append({'Label': label, 'Path': save_path})

    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        continue

aug_df = pd.DataFrame(df_augmented)
aug_df.to_csv("/content/drive/MyDrive/AML-PROJECT/23iris_degrade_augmented.csv", index=False)

## preprocessing

In [None]:
print("Total samples:", len(aug_df))
print("Unique classes:", aug_df['Label'].nunique())

Total samples: 1987
Unique classes: 200


In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['ID'], random_state=42)

In [None]:
import os
import pandas as pd
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm


all_out_dir = "/content/drive/MyDrive/AML-PROJECT/23final_all"
os.makedirs(all_out_dir, exist_ok=True)

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

def preprocess_and_save(df, save_dir, new_csv_path):
    updated_paths = []
    for i, row in tqdm(df.iterrows(), total=len(df)):
        img_path = row["Path"]
        label = row["Label"]

        img = Image.open(img_path).convert("L")
        img_tensor = transform(img)
        img_out = transforms.ToPILImage()(img_tensor)
        new_name = os.path.basename(img_path)
        save_path = os.path.join(save_dir, new_name)
        img_out.save(save_path)
        updated_paths.append((save_path, label))

    new_df = pd.DataFrame(updated_paths, columns=["image_path", "label"])
    new_df.to_csv(new_csv_path, index=False)

preprocess_and_save(aug_df, all_out_dir, "/content/drive/MyDrive/AML-PROJECT/23final_all.csv")

100%|██████████| 2000/2000 [00:49<00:00, 40.56it/s]


In [None]:
df = pd.read_csv("/content/drive/MyDrive/AML-PROJECT/23final_all.csv")

In [None]:
print("Total samples:", len(df))
print("Unique classes:", df['label'].nunique())

Total samples: 2000
Unique classes: 200
