In [1]:
import numpy as np
import pandas as pd

import cv2
from pathlib import Path

import matplotlib.pyplot as plt

from tqdm import tqdm
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.preprocessing import LabelEncoder

import os, glob, random

from src.get_features import *

In [2]:
project_path    = "/Users/k.choi/Documents/github/csiro_git"
data_path       = os.path.join(project_path, "data")
img_path        = os.path.join(data_path, "train")
img_list        = glob.glob(os.path.join(img_path, "*.jpg"))
train_df        = pd.read_csv(os.path.join(data_path, "train.csv"))
train_df['image_id'] = train_df['image_path'].apply(lambda x: Path(x).stem)

print('project_path : ' , project_path)
print('data_path : '    , data_path)
print('img_path : '     , img_path)
print('total number of images : ', len(img_list))
print('train_df : ', train_df.info())

project_path :  /Users/k.choi/Documents/github/csiro_git
data_path :  /Users/k.choi/Documents/github/csiro_git/data
img_path :  /Users/k.choi/Documents/github/csiro_git/data/train
total number of images :  357
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1785 entries, 0 to 1784
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   sample_id      1785 non-null   object 
 1   image_path     1785 non-null   object 
 2   Sampling_Date  1785 non-null   object 
 3   State          1785 non-null   object 
 4   Species        1785 non-null   object 
 5   Pre_GSHH_NDVI  1785 non-null   float64
 6   Height_Ave_cm  1785 non-null   float64
 7   target_name    1785 non-null   object 
 8   target         1785 non-null   float64
 9   image_id       1785 non-null   object 
dtypes: float64(3), object(7)
memory usage: 139.6+ KB
train_df :  None


In [3]:
%%time 

features_list = []

# for img_path in tqdm(selected_img_path, desc='features extraction in progress'):
for img_path in tqdm(img_list, desc='features extraction in progress'):

    try:
        features = update_features(img_path)
        features['image_id'] = Path(img_path).stem
        features_list.append(features)
    except Exception as e:
        print(f"Error loading {img_path}: {e}")
        continue

features_df = pd.DataFrame(features_list)

features extraction in progress: 100%|██████████| 357/357 [01:31<00:00,  3.90it/s]

CPU times: user 1min 30s, sys: 11.1 s, total: 1min 41s
Wall time: 1min 31s





In [5]:
metadata_df = train_df.groupby('image_id').first()[['Sampling_Date', 'State', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm']].reset_index()
metadata_df = pd.merge(features_df, metadata_df, on='image_id', how='left')
metadata_df.info()
metadata_df.to_csv(os.path.join(project_path + '/stats', "metadata_df.csv"), index=False)

dry_df      = train_df[train_df['target_name'] == 'Dry_Total_g'][['image_id', 'target']].copy(); print(len(dry_df))
green_df    = train_df[train_df['target_name'] == 'Dry_Green_g'][['image_id', 'target']].copy(); print(len(green_df))
dead_df     = train_df[train_df['target_name'] == 'Dry_Dead_g'][['image_id', 'target']].copy(); print(len(dead_df))
clover_df   = train_df[train_df['target_name'] == 'Dry_Clover_g'][['image_id', 'target']].copy(); print(len(clover_df))
gdm_df      = train_df[train_df['target_name'] == 'GDM_g'][['image_id', 'target']].copy(); print(len(gdm_df))

dry_df      = dry_df.rename(columns={'target': 'Dry_Total_g'})
green_df    = green_df.rename(columns={'target': 'Dry_Green_g'})
dead_df     = dead_df.rename(columns={'target': 'Dry_Dead_g'})
clover_df   = clover_df.rename(columns={'target': 'Dry_Clover_g'})
gdm_df      = gdm_df.rename(columns={'target': 'GDM_g'})

complete_df = pd.merge(metadata_df, dry_df, on='image_id')
complete_df = pd.merge(complete_df, green_df, on='image_id')
complete_df = pd.merge(complete_df, dead_df, on='image_id')
complete_df = pd.merge(complete_df, clover_df, on='image_id')
complete_df = pd.merge(complete_df, gdm_df, on='image_id')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 47 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   r_mean          357 non-null    float64
 1   r_median        357 non-null    float64
 2   r_std           357 non-null    float64
 3   g_mean          357 non-null    float64
 4   g_median        357 non-null    float64
 5   g_std           357 non-null    float64
 6   b_mean          357 non-null    float64
 7   b_median        357 non-null    float64
 8   b_std           357 non-null    float64
 9   green_coverage  357 non-null    float64
 10  hue_mean        357 non-null    float64
 11  hue_std         357 non-null    float64
 12  sat_mean        357 non-null    float64
 13  sat_std         357 non-null    float64
 14  val_mean        357 non-null    float64
 15  val_std         357 non-null    float64
 16  sobel_mean      357 non-null    float64
 17  sobel_std       357 non-null    flo

In [7]:
print(complete_df.columns)
print(complete_df.info())

complete_df.to_csv(os.path.join(project_path + '/stats', "complete_df.csv"), index=False)

Index(['r_mean', 'r_median', 'r_std', 'g_mean', 'g_median', 'g_std', 'b_mean',
       'b_median', 'b_std', 'green_coverage', 'hue_mean', 'hue_std',
       'sat_mean', 'sat_std', 'val_mean', 'val_std', 'sobel_mean', 'sobel_std',
       'canny_mean', 'canny_std', 'binary_mean', 'binary_std', 'lbp_mean',
       'lbp_std', 'lbp_0', 'lbp_1', 'lbp_2', 'lbp_3', 'lbp_4', 'lbp_5',
       'lbp_6', 'lbp_7', 'lbp_8', 'lbp_9', 'contrast', 'dissimilarity',
       'homogeneity', 'energy', 'glcm_corre', 'ASM', 'entropy', 'image_id',
       'Sampling_Date', 'State', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm',
       'Dry_Total_g', 'Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g', 'GDM_g'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 52 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   r_mean          357 non-null    float64
 1   r_median        357 non-null    float64
 2   r_std     