In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage import io,img_as_float
from skimage.filters import sobel
from scipy.stats import skew, kurtosis,entropy
import matplotlib.pyplot as plt 

ModuleNotFoundError: No module named 'cv2'

In [None]:
def load_gray(img_path,size=(512,512)):
    img=io.imread(img_path, as_gray=True)
    img=img_as_float(img)
    return cv2.resize(img,size, interpolation=cv2.INTER_AREA)

In [None]:
def extract_features(img,file_path,scanner_id="unknown"):
    h,w=img.shape
    aspect_ratio=w/h
    file_size_kb=os.path.getsize(file_path)/1024  # in KB
    pixels=img.flatten()
    mean_intensity=np.mean(pixels)
    std_intensity=np.std(pixels)
    skewness=skew(pixels)
    kurt=kurtosis(pixels)
    ent=entropy(np.histogram(pixels, bins=256, range=(0, 1))[0]+1e-6)
    edges=sobel(img)
    edge_density=np.mean(edges>0.1)
    return {
        "file_name": os.path.basename(file_path),
        "class_label": scanner_id,
        "width": w,
        "height": h,
        "aspect_ratio": aspect_ratio,
        "file_size_kb": file_size_kb,
        "mean_intensity": mean_intensity,
        "std_intensity": std_intensity,
        "skewness": skewness,
        "kurtosis": kurt,
        "entropy": ent,
        "edge_density": edge_density
    }


In [None]:
columns = [
    "file_name","class_label","width", "height", "aspect_ratio", "file_size_kb",
    "mean_intensity", "std_intensity", "skewness", "kurtosis",
    "entropy", "edge_density"
]

In [None]:
metadata = pd.DataFrame(columns=columns)

In [None]:
def process_image_and_add(df,img_path,scanner_id="unknown"):
    img=load_gray(img_path)
    features=extract_features(img,img_path,scanner_id)
    return pd.concat([df, pd.DataFrame([features])], ignore_index=True)

def process_image_and_append(df, img_path, scanner_id="unknown"):
    img = load_gray(img_path)
    features = extract_features(img, img_path, scanner_id)
    df.loc[len(df)] = features
    return df

In [None]:
img_path = r"C:\Scanner_Source_Identification\s1_1.tif"
file_path = r"C:\trace\Canon120-1\150\s1_1(1).tif"


In [None]:
metadata_df = process_image_and_append(metadata, img_path, scanner_id="Canon120-1")

In [None]:
metadata.info()


<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 0 to 0
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   file_name       1 non-null      object 
 1   class_label     1 non-null      object 
 2   width           1 non-null      int64  
 3   height          1 non-null      int64  
 4   aspect_ratio    1 non-null      float64
 5   file_size_kb    1 non-null      float64
 6   mean_intensity  1 non-null      float64
 7   std_intensity   1 non-null      float64
 8   skewness        1 non-null      float64
 9   kurtosis        1 non-null      float64
 10  entropy         1 non-null      float64
 11  edge_density    1 non-null      float64
dtypes: float64(8), int64(2), object(2)
memory usage: 104.0+ bytes


In [None]:
metadata.head()


Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density
0,s1_1.tif,Canon120-1,512,512,1.0,6366.801758,0.972213,0.104171,-4.147706,16.98554,0.87978,0.128208


In [None]:
metadata_df = process_image_and_append(metadata, img_path, scanner_id="Canon120-1")

In [None]:
dataset_dir = r"D:\Scanner_Source_Identification\canon 120-1"


In [None]:
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.lower().endswith(('.tif', '.jpg', '.png')):
            img_path = os.path.join(root, file)
            relative_path = os.path.relpath(img_path, dataset_dir)
            scanner_id = relative_path.split(os.sep)[0]
            
            print(f"Processing: {file} | Scanner ID: {scanner_id}")
            metadata = process_image_and_append(metadata, img_path, scanner_id)

Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [None]:
metadata.info()
metadata.head()

<class 'pandas.core.frame.DataFrame'>
Index: 190 entries, 0 to 189
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   file_name       190 non-null    object 
 1   class_label     190 non-null    object 
 2   width           190 non-null    int64  
 3   height          190 non-null    int64  
 4   aspect_ratio    190 non-null    float64
 5   file_size_kb    190 non-null    float64
 6   mean_intensity  190 non-null    float64
 7   std_intensity   190 non-null    float64
 8   skewness        190 non-null    float64
 9   kurtosis        190 non-null    float64
 10  entropy         190 non-null    float64
 11  edge_density    190 non-null    float64
dtypes: float64(8), int64(2), object(2)
memory usage: 19.3+ KB


Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density
0,s1_1 (1).tif,150,512,512,1.0,25495.067383,0.971553,0.104441,-4.168368,17.296134,0.931847,0.129837
1,s1_1.tif,150,512,512,1.0,6366.801758,0.972213,0.104171,-4.147706,16.98554,0.87978,0.128208
2,s1_10.tif,150,512,512,1.0,6366.801758,0.969917,0.10919,-4.046214,16.01423,0.883227,0.097931
3,s1_100.tif,150,512,512,1.0,6366.801758,0.985309,0.075691,-6.530848,48.380602,0.566759,0.069481
4,s1_11.tif,150,512,512,1.0,6366.801758,0.973881,0.087714,-4.209022,19.795997,1.04478,0.134338


In [None]:
metadata["class_label"].value_counts()

class_label
150    101
300     89
Name: count, dtype: int64

In [None]:
import os
import pandas as pd

# Define both folder paths
folders = [
    r"D:\Scanner_Source_Identification\canon 120-1",
    r"D:\Scanner_Source_Identification\canon 120-2"
]

# Define metadata columns
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio", "file_size_kb",
    "mean_intensity", "std_intensity", "skewness", "kurtosis",
    "entropy", "edge_density"
]
metadata = pd.DataFrame(columns=columns)

# Loop through each folder
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, os.path.dirname(folders[0]))
                scanner_id = os.path.basename(os.path.dirname(img_path))  # '150' or '300'

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)


Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [None]:
import os
import pandas as pd

# Define both folder paths
folders = [
    r"D:\Scanner_Source_Identification\canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2"
]

# Define metadata columns
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio", "file_size_kb",
    "mean_intensity", "std_intensity", "skewness", "kurtosis",
    "entropy", "edge_density"
]
metadata = pd.DataFrame(columns=columns)

# Loop through each folder
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, os.path.dirname(folders[0]))
                scanner_id = os.path.basename(os.path.dirname(img_path))  # '150' or '300'

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)


Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [None]:
import os
import pandas as pd

# Define both folder paths
folders = [
    r"D:\Scanner_Source_Identification\canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2"
]

# Define metadata columns
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio", "file_size_kb",
    "mean_intensity", "std_intensity", "skewness", "kurtosis",
    "entropy", "edge_density"
]
metadata = pd.DataFrame(columns=columns)

# Loop through each folder
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, os.path.dirname(folders[0]))
                scanner_id = os.path.basename(os.path.dirname(img_path))  # '150' or '300'

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)


Processing: s2_1.tif | Scanner ID: 150
Processing: s2_10.tif | Scanner ID: 150
Processing: s2_100.tif | Scanner ID: 150
Processing: s2_11.tif | Scanner ID: 150
Processing: s2_12.tif | Scanner ID: 150
Processing: s2_13.tif | Scanner ID: 150
Processing: s2_14.tif | Scanner ID: 150
Processing: s2_15.tif | Scanner ID: 150
Processing: s2_16.tif | Scanner ID: 150
Processing: s2_17.tif | Scanner ID: 150
Processing: s2_18.tif | Scanner ID: 150
Processing: s2_19.tif | Scanner ID: 150
Processing: s2_2.tif | Scanner ID: 150
Processing: s2_20.tif | Scanner ID: 150
Processing: s2_21.tif | Scanner ID: 150
Processing: s2_22.tif | Scanner ID: 150
Processing: s2_23.tif | Scanner ID: 150
Processing: s2_24.tif | Scanner ID: 150
Processing: s2_25.tif | Scanner ID: 150
Processing: s2_26.tif | Scanner ID: 150
Processing: s2_27.tif | Scanner ID: 150
Processing: s2_28.tif | Scanner ID: 150
Processing: s2_29.tif | Scanner ID: 150
Processing: s2_3.tif | Scanner ID: 150
Processing: s2_30.tif | Scanner ID: 150
Pr

In [None]:
import pandas as pd

df = pd.DataFrame(metadata)
print(df.head())
print(f"Total images: {len(df)}")


  scanner_id                                         image_path
0        150  D:\Scanner_Source_Identification\Canon120-2\15...
1        150  D:\Scanner_Source_Identification\Canon120-2\15...
2        150  D:\Scanner_Source_Identification\Canon120-2\15...
3        150  D:\Scanner_Source_Identification\Canon120-2\15...
4        150  D:\Scanner_Source_Identification\Canon120-2\15...
Total images: 174


In [None]:
import os

def process_image_and_append(metadata, img_path, scanner_id):
    # Just an example: you can put your actual processing here
    metadata.append({
        "scanner_id": scanner_id,
        "image_path": img_path
    })
    return metadata

# List of all main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2"
]

metadata = []

# Loop through all main folders
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                # Get relative path for folder hierarchy
                relative_path = os.path.relpath(img_path, dataset_dir)
                # scanner_id = subfolder name (e.g. '150' or '300')
                scanner_id = relative_path.split(os.sep)[0]

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)

print(f"\n✅ Total images processed: {len(metadata)}")


Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [None]:
import os

def process_image_and_append(metadata, img_path, scanner_id):
    # Just an example: you can put your actual processing here
    metadata.append({
        "scanner_id": scanner_id,
        "image_path": img_path
    })
    return metadata

# List of all main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220"
    
]

metadata = []

# Loop through all main folders
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                # Get relative path for folder hierarchy
                relative_path = os.path.relpath(img_path, dataset_dir)
                # scanner_id = subfolder name (e.g. '150' or '300')
                scanner_id = relative_path.split(os.sep)[0]

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)

print(f"\n✅ Total images processed: {len(metadata)}")

Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [3]:
import os

def process_image_and_append(metadata, img_path, scanner_id):
    # Just an example: you can put your actual processing here
    metadata.append({
        "scanner_id": scanner_id,
        "image_path": img_path
    })
    return metadata

# List of all main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2"

    
]

metadata = []

# Loop through all main folders
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                # Get relative path for folder hierarchy
                relative_path = os.path.relpath(img_path, dataset_dir)
                # scanner_id = subfolder name (e.g. '150' or '300')
                scanner_id = relative_path.split(os.sep)[0]

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)

print(f"\n✅ Total images processed: {len(metadata)}")

Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [None]:
import os

def process_image_and_append(metadata, img_path, scanner_id):
    # Just an example: you can put your actual processing here
    metadata.append({
        "scanner_id": scanner_id,
        "image_path": img_path
    })
    return metadata

# Loop through all main folders
for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                # Get relative path for folder hierarchy
                relative_path = os.path.relpath(img_path, dataset_dir)
                # scanner_id = subfolder name (e.g. '150' or '300')
                scanner_id = relative_path.split(os.sep)[0]

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)

print(f"\n✅ Total images processed: {len(metadata)}")

Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [8]:
type(metadata)


list

In [9]:
print(len(metadata))
print(type(metadata[0]))


1457
<class 'dict'>


In [18]:
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]

for dataset_dir in folders:
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, dataset_dir)
                scanner_id = relative_path.split(os.sep)[0]
                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                metadata = process_image_and_append(metadata, img_path, scanner_id)


Processing: s1_1 (1).tif | Scanner ID: 150
Processing: s1_1.tif | Scanner ID: 150
Processing: s1_10.tif | Scanner ID: 150
Processing: s1_100.tif | Scanner ID: 150
Processing: s1_11.tif | Scanner ID: 150
Processing: s1_12.tif | Scanner ID: 150
Processing: s1_13.tif | Scanner ID: 150
Processing: s1_14.tif | Scanner ID: 150
Processing: s1_15.tif | Scanner ID: 150
Processing: s1_16.tif | Scanner ID: 150
Processing: s1_17.tif | Scanner ID: 150
Processing: s1_18.tif | Scanner ID: 150
Processing: s1_19.tif | Scanner ID: 150
Processing: s1_2.tif | Scanner ID: 150
Processing: s1_20.tif | Scanner ID: 150
Processing: s1_21.tif | Scanner ID: 150
Processing: s1_22.tif | Scanner ID: 150
Processing: s1_23.tif | Scanner ID: 150
Processing: s1_24.tif | Scanner ID: 150
Processing: s1_25.tif | Scanner ID: 150
Processing: s1_26.tif | Scanner ID: 150
Processing: s1_27.tif | Scanner ID: 150
Processing: s1_28.tif | Scanner ID: 150
Processing: s1_29.tif | Scanner ID: 150
Processing: s1_3.tif | Scanner ID: 150

In [19]:
for dataset_dir in folders:
    # scanner_id will be the folder name only (e.g. "Canon120-1")
    scanner_id = os.path.basename(dataset_dir)
    
    # Walk through all images in that folder
    for root, dirs, files in os.walk(dataset_dir):
        for file in files:
            if file.lower().endswith(('.tif', '.jpg', '.png')):
                img_path = os.path.join(root, file)

                print(f"Processing: {file} | Scanner ID: {scanner_id}")
                
                # Append features to the dataframe
                metadata = process_image_and_append(metadata, img_path, scanner_id)


Processing: s1_1 (1).tif | Scanner ID: Canon120-1
Processing: s1_1.tif | Scanner ID: Canon120-1
Processing: s1_10.tif | Scanner ID: Canon120-1
Processing: s1_100.tif | Scanner ID: Canon120-1
Processing: s1_11.tif | Scanner ID: Canon120-1
Processing: s1_12.tif | Scanner ID: Canon120-1
Processing: s1_13.tif | Scanner ID: Canon120-1
Processing: s1_14.tif | Scanner ID: Canon120-1
Processing: s1_15.tif | Scanner ID: Canon120-1
Processing: s1_16.tif | Scanner ID: Canon120-1
Processing: s1_17.tif | Scanner ID: Canon120-1
Processing: s1_18.tif | Scanner ID: Canon120-1
Processing: s1_19.tif | Scanner ID: Canon120-1
Processing: s1_2.tif | Scanner ID: Canon120-1
Processing: s1_20.tif | Scanner ID: Canon120-1
Processing: s1_21.tif | Scanner ID: Canon120-1
Processing: s1_22.tif | Scanner ID: Canon120-1
Processing: s1_23.tif | Scanner ID: Canon120-1
Processing: s1_24.tif | Scanner ID: Canon120-1
Processing: s1_25.tif | Scanner ID: Canon120-1
Processing: s1_26.tif | Scanner ID: Canon120-1
Processing:

In [23]:
import pandas as pd

columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity", "skewness",
    "kurtosis", "entropy", "edge_density"
]

metadata = pd.DataFrame(columns=columns)


In [24]:
metadata.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   file_name       0 non-null      object
 1   class_label     0 non-null      object
 2   width           0 non-null      object
 3   height          0 non-null      object
 4   aspect_ratio    0 non-null      object
 5   file_size_kb    0 non-null      object
 6   mean_intensity  0 non-null      object
 7   std_intensity   0 non-null      object
 8   skewness        0 non-null      object
 9   kurtosis        0 non-null      object
 10  entropy         0 non-null      object
 11  edge_density    0 non-null      object
dtypes: object(12)
memory usage: 132.0+ bytes


In [25]:
metadata.head()

Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [26]:
import pandas as pd

columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata, columns=columns)


In [27]:
metadata_df.head()


Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [1]:
import os
import cv2
import pandas as pd
import numpy as np
from skimage import io
from skimage.filters import sobel
from skimage.measure import shannon_entropy, regionprops, label


ModuleNotFoundError: No module named 'cv2'

In [2]:
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]


In [3]:
metadata_list = []

for folder in folders:
    class_label = os.path.basename(folder)  # Folder name as class_label
    for file in os.listdir(folder):
        if file.endswith(".tif") or file.endswith(".jpg") or file.endswith(".png"):  # adjust file types
            file_path = os.path.join(folder, file)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue

            height, width = img.shape
            aspect_ratio = width / height
            file_size_kb = os.path.getsize(file_path) / 1024
            mean_intensity = np.mean(img)
            std_intensity = np.std(img)
            skewness = pd.Series(img.flatten()).skew()
            kurtosis = pd.Series(img.flatten()).kurtosis()
            entropy = shannon_entropy(img)
            edge_density = np.mean(sobel(img))

            metadata_list.append([
                file, class_label, width, height, aspect_ratio,
                file_size_kb, mean_intensity, std_intensity,
                skewness, kurtosis, entropy, edge_density
            ])


In [6]:
import pandas as pd
import numpy as np
import os
import cv2
from skimage.filters import sobel
from skimage.measure import shannon_entropy


In [7]:
metadata_df = pd.DataFrame(metadata_list, columns=[
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
])


In [8]:
metadata_df.head()

Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [11]:
metadata_list = []

for folder in folders:
    class_label = os.path.basename(folder)  # folder name as class_label
    for file in os.listdir(folder):
        if file.endswith((".tif", ".jpg", ".png")):  # only images
            file_path = os.path.join(folder, file)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # read in grayscale

            if img is None:  # skip unreadable files
                print(f"Skipping {file_path}, cannot read image")
                continue

            height, width = img.shape
            aspect_ratio = width / height
            file_size_kb = os.path.getsize(file_path) / 1024
            mean_intensity = np.mean(img)
            std_intensity = np.std(img)
            skewness = pd.Series(img.flatten()).skew()
            kurtosis = pd.Series(img.flatten()).kurtosis()
            entropy = shannon_entropy(img)
            edge_density = np.mean(sobel(img))

            metadata_list.append([
                file, class_label, width, height, aspect_ratio,
                file_size_kb, mean_intensity, std_intensity,
                skewness, kurtosis, entropy, edge_density
            ])


In [12]:
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata_list, columns=columns)


In [13]:
metadata_df.head()      # see first few rows
metadata_df.describe()  # statistics for numeric columns
metadata_df.info()      # check total entries and types


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   file_name       0 non-null      object
 1   class_label     0 non-null      object
 2   width           0 non-null      object
 3   height          0 non-null      object
 4   aspect_ratio    0 non-null      object
 5   file_size_kb    0 non-null      object
 6   mean_intensity  0 non-null      object
 7   std_intensity   0 non-null      object
 8   skewness        0 non-null      object
 9   kurtosis        0 non-null      object
 10  entropy         0 non-null      object
 11  edge_density    0 non-null      object
dtypes: object(12)
memory usage: 132.0+ bytes


In [15]:
metadata_df.head() 

Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [21]:
metadata_df.to_csv("full_metadata.csv", index=False)


In [22]:
metadata_df.to_csv(r"D:\Scanner_Source_Identification\full_metadata.csv", index=False)


In [24]:
metadata_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   file_name       0 non-null      object
 1   class_label     0 non-null      object
 2   width           0 non-null      object
 3   height          0 non-null      object
 4   aspect_ratio    0 non-null      object
 5   file_size_kb    0 non-null      object
 6   mean_intensity  0 non-null      object
 7   std_intensity   0 non-null      object
 8   skewness        0 non-null      object
 9   kurtosis        0 non-null      object
 10  entropy         0 non-null      object
 11  edge_density    0 non-null      object
dtypes: object(12)
memory usage: 132.0+ bytes


In [26]:
metadata_list = []

for folder in folders:
    class_label = os.path.basename(folder)  # folder name as class_label
    for file in os.listdir(folder):
        if file.endswith((".tif", ".jpg", ".png")):  # only images
            file_path = os.path.join(folder, file)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # read in grayscale

            if img is None:  # skip unreadable files
                print(f"Skipping {file_path}, cannot read image")
                continue

            height, width = img.shape
            aspect_ratio = width / height
            file_size_kb = os.path.getsize(file_path) / 1024
            mean_intensity = np.mean(img)
            std_intensity = np.std(img)
            skewness = pd.Series(img.flatten()).skew()
            kurtosis = pd.Series(img.flatten()).kurtosis()
            entropy = shannon_entropy(img)
            edge_density = np.mean(sobel(img))

            metadata_list.append([
                file, class_label, width, height, aspect_ratio,
                file_size_kb, mean_intensity, std_intensity,
                skewness, kurtosis, entropy, edge_density
            ])


In [28]:
metadata_df.head()

Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [29]:
metadata_df = process_image_and_append(metadata, img_path, scanner_id="Canon120-1")

NameError: name 'process_image_and_append' is not defined

In [30]:
import os
import cv2
import pandas as pd
import numpy as np
from skimage.filters import sobel
from skimage.measure import shannon_entropy

# List all folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]

metadata_list = []

# Loop through each folder and process images
for folder in folders:
    class_label = os.path.basename(folder)  # folder name as class_label
    for file in os.listdir(folder):
        if file.endswith((".tif", ".jpg", ".png")):  # only image files
            file_path = os.path.join(folder, file)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # read in grayscale

            if img is None:  # skip unreadable files
                print(f"Skipping {file_path}, cannot read image")
                continue

            height, width = img.shape
            aspect_ratio = width / height
            file_size_kb = os.path.getsize(file_path) / 1024
            mean_intensity = np.mean(img)
            std_intensity = np.std(img)
            skewness = pd.Series(img.flatten()).skew()
            kurtosis = pd.Series(img.flatten()).kurtosis()
            entropy = shannon_entropy(img)
            edge_density = np.mean(sobel(img))

            metadata_list.append([
                file, class_label, width, height, aspect_ratio,
                file_size_kb, mean_intensity, std_intensity,
                skewness, kurtosis, entropy, edge_density
            ])

# Convert list to DataFrame
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata_list, columns=columns)

# Optional: check first rows
print(metadata_df.head())

# Save to CSV
metadata_df.to_csv("full_metadata.csv", index=False)
print("✅ Metadata saved as full_metadata.csv in current folder")


Empty DataFrame
Columns: [file_name, class_label, width, height, aspect_ratio, file_size_kb, mean_intensity, std_intensity, skewness, kurtosis, entropy, edge_density]
Index: []
✅ Metadata saved as full_metadata.csv in current folder


In [31]:
metadata_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   file_name       0 non-null      object
 1   class_label     0 non-null      object
 2   width           0 non-null      object
 3   height          0 non-null      object
 4   aspect_ratio    0 non-null      object
 5   file_size_kb    0 non-null      object
 6   mean_intensity  0 non-null      object
 7   std_intensity   0 non-null      object
 8   skewness        0 non-null      object
 9   kurtosis        0 non-null      object
 10  entropy         0 non-null      object
 11  edge_density    0 non-null      object
dtypes: object(12)
memory usage: 132.0+ bytes


In [32]:
metadata_df.head()

Unnamed: 0,file_name,class_label,width,height,aspect_ratio,file_size_kb,mean_intensity,std_intensity,skewness,kurtosis,entropy,edge_density


In [33]:
print(len(metadata_df))


0


In [34]:
import os
import cv2
import pandas as pd
import numpy as np
from skimage.filters import sobel
from skimage.measure import shannon_entropy

# List of main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]

metadata_list = []

# Loop through main folders
for folder in folders:
    class_label = os.path.basename(folder)
    print(f"Processing folder: {class_label}")

    # Recursively walk through all subfolders
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.lower().endswith((".tif", ".jpg", ".png")):
                file_path = os.path.join(root, file)
                img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    print(f"Skipping {file_path}, cannot read image")
                    continue

                height, width = img.shape
                aspect_ratio = width / height
                file_size_kb = os.path.getsize(file_path) / 1024
                mean_intensity = np.mean(img)
                std_intensity = np.std(img)
                skewness = pd.Series(img.flatten()).skew()
                kurtosis = pd.Series(img.flatten()).kurtosis()
                entropy = shannon_entropy(img)
                edge_density = np.mean(sobel(img))

                metadata_list.append([
                    file, class_label, width, height, aspect_ratio,
                    file_size_kb, mean_intensity, std_intensity,
                    skewness, kurtosis, entropy, edge_density
                ])

# Create DataFrame
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata_list, columns=columns)

# Preview
print(metadata_df.head())

# Save CSV
metadata_df.to_csv("full_metadata.csv", index=False)
print("✅ Metadata saved as full_metadata.csv")


Processing folder: Canon120-1
Processing folder: Canon120-2
Processing folder: Canon220
Processing folder: Canon9000-1
Processing folder: Canon9000-2
Processing folder: EpsonV39-1
Processing folder: EpsonV39-2
Processing folder: EpsonV370-1
Processing folder: EpsonV370-2
Processing folder: EpsonV550
Processing folder: HP
      file_name class_label  width  height  aspect_ratio  file_size_kb  \
0  s1_1 (1).tif  Canon120-1   2480    3508      0.706956  25495.067383   
1      s1_1.tif  Canon120-1   1240    1752      0.707763   6366.801758   
2     s1_10.tif  Canon120-1   1240    1752      0.707763   6366.801758   
3    s1_100.tif  Canon120-1   1240    1752      0.707763   6366.801758   
4     s1_11.tif  Canon120-1   1240    1752      0.707763   6366.801758   

   mean_intensity  std_intensity  skewness   kurtosis   entropy  edge_density  
0      247.771843      32.923892 -4.875400  22.621641  1.489360      0.017195  
1      247.957934      32.055566 -4.814412  22.196448  1.086845      0.0

In [1]:
metadata_df.info()

NameError: name 'metadata_df' is not defined

In [7]:
import sys
print(sys.executable)


c:\Users\DELL\AppData\Local\Programs\Python\Python314\python.exe


In [1]:
import os
import cv2
import pandas as pd
import numpy as np
from skimage.filters import sobel
from skimage.measure import shannon_entropy

# List of main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]

metadata_list = []

# Loop through main folders
for folder in folders:
    class_label = os.path.basename(folder)
    print(f"Processing folder: {class_label}")

    # Recursively walk through all subfolders
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.lower().endswith((".tif", ".jpg", ".png")):
                file_path = os.path.join(root, file)
                img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    print(f"Skipping {file_path}, cannot read image")
                    continue

                height, width = img.shape
                aspect_ratio = width / height
                file_size_kb = os.path.getsize(file_path) / 1024
                mean_intensity = np.mean(img)
                std_intensity = np.std(img)
                skewness = pd.Series(img.flatten()).skew()
                kurtosis = pd.Series(img.flatten()).kurtosis()
                entropy = shannon_entropy(img)
                edge_density = np.mean(sobel(img))

                metadata_list.append([
                    file, class_label, width, height, aspect_ratio,
                    file_size_kb, mean_intensity, std_intensity,
                    skewness, kurtosis, entropy, edge_density
                ])

# Create DataFrame
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata_list, columns=columns)

# Preview
print(metadata_df.head())

# Save CSV
metadata_df.to_csv("full_metadata.csv", index=False)
print("✅ Metadata saved as full_metadata.csv")

import os
import cv2
import pandas as pd
import numpy as np
from skimage.filters import sobel
from skimage.measure import shannon_entropy

# List of main folders
folders = [
    r"D:\Scanner_Source_Identification\Canon120-1",
    r"D:\Scanner_Source_Identification\Canon120-2",
    r"D:\Scanner_Source_Identification\Canon220",
    r"D:\Scanner_Source_Identification\Canon9000-1",
    r"D:\Scanner_Source_Identification\Canon9000-2",
    r"D:\Scanner_Source_Identification\EpsonV39-1",
    r"D:\Scanner_Source_Identification\EpsonV39-2",
    r"D:\Scanner_Source_Identification\EpsonV370-1",
    r"D:\Scanner_Source_Identification\EpsonV370-2",
    r"D:\Scanner_Source_Identification\EpsonV550",
    r"D:\Scanner_Source_Identification\HP"
]

metadata_list = []

# Loop through main folders
for folder in folders:
    class_label = os.path.basename(folder)
    print(f"Processing folder: {class_label}")

    # Recursively walk through all subfolders
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.lower().endswith((".tif", ".jpg", ".png")):
                file_path = os.path.join(root, file)
                img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    print(f"Skipping {file_path}, cannot read image")
                    continue

                height, width = img.shape
                aspect_ratio = width / height
                file_size_kb = os.path.getsize(file_path) / 1024
                mean_intensity = np.mean(img)
                std_intensity = np.std(img)
                skewness = pd.Series(img.flatten()).skew()
                kurtosis = pd.Series(img.flatten()).kurtosis()
                entropy = shannon_entropy(img)
                edge_density = np.mean(sobel(img))

                metadata_list.append([
                    file, class_label, width, height, aspect_ratio,
                    file_size_kb, mean_intensity, std_intensity,
                    skewness, kurtosis, entropy, edge_density
                ])

# Create DataFrame
columns = [
    "file_name", "class_label", "width", "height", "aspect_ratio",
    "file_size_kb", "mean_intensity", "std_intensity",
    "skewness", "kurtosis", "entropy", "edge_density"
]

metadata_df = pd.DataFrame(metadata_list, columns=columns)

# Preview
print(metadata_df.head())
print(metadata_df.info())
print(metadata_df.describe())
print(metadata_df["class_label"].value_counts())

# Save CSV
metadata_df.to_csv("full_metadata.csv", index=False)
print("✅ Metadata saved as full_metadata.csv")
9

Processing folder: Canon120-1
Processing folder: Canon120-2
Processing folder: Canon220
Processing folder: Canon9000-1
Processing folder: Canon9000-2
Processing folder: EpsonV39-1
Processing folder: EpsonV39-2
Processing folder: EpsonV370-1
Processing folder: EpsonV370-2
Processing folder: EpsonV550
Processing folder: HP
      file_name class_label  width  height  aspect_ratio  file_size_kb  \
0  s1_1 (1).tif  Canon120-1   2480    3508      0.706956  25495.067383   
1      s1_1.tif  Canon120-1   1240    1752      0.707763   6366.801758   
2     s1_10.tif  Canon120-1   1240    1752      0.707763   6366.801758   
3    s1_100.tif  Canon120-1   1240    1752      0.707763   6366.801758   
4     s1_11.tif  Canon120-1   1240    1752      0.707763   6366.801758   

   mean_intensity  std_intensity  skewness   kurtosis   entropy  edge_density  
0      247.771843      32.923892 -4.875400  22.621641  1.489360      0.017195  
1      247.957934      32.055566 -4.814412  22.196448  1.086845      0.0

In [None]:
//W O R K 2 (SC A N N E R  S O U R C E  I D E N T I F I C A T I O N)

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage import io,img_as_float
from skimage.filters import sobel
from scipy.stats import skew, kurtosis,entropy
import matplotlib.pyplot as plt 
from PIL import Image, ExifTags
from tqdm import tqdm

In [4]:
def load_gray(img_path,size=(512,512)):
    img=io.imread(img_path, as_gray=True)
    img=img_as_float(img)
    return cv2.resize(img,size, interpolation=cv2.INTER_AREA)

In [5]:
def extract_features(img,file_path,class_label,pixel_density):
    h,w=img.shape
    aspect_ratio=w/h
    file_size_kb=os.path.getsize(file_path)/1024  # in KB
    pixels=img.flatten()
    mean_intensity=np.mean(pixels)
    std_intensity=np.std(pixels)
    skewness=skew(pixels)
    kurt=kurtosis(pixels)
    ent=entropy(np.histogram(pixels, bins=256, range=(0, 1))[0]+1e-6)
    edges=sobel(img)
    edge_density=np.mean(edges>0.1)
    return {
        "file_name": os.path.basename(file_path),
        "class_label": class_label,
        "pixel_density": pixel_density,
        "width": w,
        "height": h,
        "aspect_ratio": aspect_ratio,
        "file_size_kb": file_size_kb,
        "mean_intensity": mean_intensity,
        "std_intensity": std_intensity,
        "skewness": skewness,
        "kurtosis": kurt,
        "entropy": ent,
        "edge_density": edge_density
    }


In [6]:
def build_metadata(root_folder):
    columns = [
        "file_name", "class_label", "pixel_density", "width", "height",
        "aspect_ratio", "file_size_kb", "mean_intensity", "std_intensity",
        "skewness", "kurtosis", "entropy", "edge_density"
    ]
    metadata = pd.DataFrame(columns=columns)

    for class_folder in os.listdir(root_folder):
        class_path = os.path.join(root_folder, class_folder)
        if not os.path.isdir(class_path):
            continue  # skip files

        for pixel_folder in os.listdir(class_path):
            pixel_path = os.path.join(class_path, pixel_folder)
            if not os.path.isdir(pixel_path):
                continue

            # Loop through all images inside pixel density folder
            for file_name in tqdm(os.listdir(pixel_path), desc=f"{class_folder}/{pixel_folder}"):
                if file_name.lower().endswith(".tif"):
                    file_path = os.path.join(pixel_path, file_name)
                    img = load_gray(file_path)
                    features = extract_features(img, file_path, class_folder, pixel_folder)
                    metadata.loc[len(metadata)] = features

    return metadata


In [None]:
root_path = r"D:\Scanner_Source_Identification"
metadata_df = build_metadata(root_path)
metadata_df.to_csv("image_metadata.csv", index=False)