In [1]:
import os
import pandas as pd
from tqdm import tqdm
from glob import glob
from dotenv import load_dotenv
import torch
from torch import optim
from torch.utils.data import DataLoader
from utils.plot_image import image_gallery, plot_reconstructions
import matplotlib.pyplot as plt
from base64 import urlsafe_b64decode


load_dotenv(override=True)

LOCAL_BUCKET_FOLDER = os.getenv("LOCAL_BUCKET_FOLDER")
EXPERIMENT_FOLDER = "experiment_2"

prod_raw_folder = LOCAL_BUCKET_FOLDER + "/prod_raw"
raw_images = glob(prod_raw_folder + "/**/*.jpg", recursive=True)
raw_images = list(set(raw_images))

extra_images_folder = LOCAL_BUCKET_FOLDER + "/video_extraction"
extra_images = glob(extra_images_folder + "/**/*.jpg", recursive=True)
extra_images = list(set(extra_images))

raw_images = raw_images + extra_images

raw_images_df = pd.DataFrame(raw_images, columns=["image_path"])

def get_image_name(image_path, offset:int):
    return '/'.join(image_path.split('/')[offset:]).split('.')[0]

raw_images_df['image_name'] = raw_images_df.image_path.apply(lambda x: get_image_name(x, 2))

print('total:',len(raw_images_df))
def images_split(images_df: pd.DataFrame, train_size: float = 0.90, val_size: float = 0.05, test_size: float = 0.05, random_state: int = 42):
    assert train_size + val_size + test_size == 1
    train_df = images_df.sample(frac=train_size, random_state=random_state)
    val_df = images_df.drop(train_df.index).sample(frac=val_size / (1 - train_size), random_state=random_state)
    test_df = images_df.drop(train_df.index).drop(val_df.index)
    return train_df, val_df, test_df


  from .autonotebook import tqdm as notebook_tqdm


total: 21420


In [2]:
from PIL import Image
def get_img_dims(fp):
    return Image.open(fp).size

get_img_dims(raw_images_df.image_path.iloc[0])

(240, 192)

In [3]:
raw_images_df

Unnamed: 0,image_path,image_name
0,bucket_data/prod_raw/PV1066/2023-11-16-01-02-0...,PV1066/2023-11-16-01-02-06_Sprayer
1,bucket_data/prod_raw/PV1006/Nobili Flail 808_2...,PV1006/Nobili Flail 808_2023-05-17-08-45-47_11
2,bucket_data/prod_raw/PV1066/2023-12-04-03-28-5...,PV1066/2023-12-04-03-28-50_flail_mower
3,bucket_data/prod_raw/PV1035/04_seeder_schmeise...,PV1035/04_seeder_schmeiser__2023-08-10-05-30-47_1
4,bucket_data/prod_raw/PV1020/unknown_2023-04-12...,PV1020/unknown_2023-04-12-11-17-41_1
...,...,...
21415,bucket_data/video_extraction/2023-10-31T13-38-...,2023-10-31T13-38-28/PV1032/PTOCamera/2023-08-0...
21416,bucket_data/video_extraction/2023-10-31T13-38-...,2023-10-31T13-38-28/PS1054/PTOCamera/2023-10-2...
21417,bucket_data/video_extraction/2023-10-31T13-38-...,2023-10-31T13-38-28/PV1031/PTOCamera/2023-09-1...
21418,bucket_data/video_extraction/2023-10-31T13-38-...,2023-10-31T13-38-28/PV1031/PTOCamera/2023-09-1...


In [4]:
from json import load

def read_json(fp):
    with open(fp) as f:
        return load(f)

def get_bbox_from_json(fp):
    json_dict = read_json(fp)
    bbox = json_dict['new_bounding_box']
    bbox_list = [
        bbox['upper'],
        bbox['bottom'],
        bbox['left'],
        bbox['right'],
    ]
    return bbox_list

In [5]:
labeled_df = pd.DataFrame({
    'json_path': glob(LOCAL_BUCKET_FOLDER + "/prod_clean/**/*.json", recursive=True)
})
labeled_df['implement_class'] = labeled_df.json_path.apply(lambda x: x.split('/')[2])
labeled_df['implement_class'] = labeled_df['implement_class'].apply(lambda x: urlsafe_b64decode(x).decode("utf-8").replace('<O6kDj>', ';'))
labeled_df['implement_class'] = labeled_df.implement_class.astype("category")
labeled_df['image_name'] = labeled_df.json_path.apply(lambda x: get_image_name(x, 3))
labeled_df['bbox'] = labeled_df.json_path.apply(get_bbox_from_json)
#labeled_df.drop('json_path', axis=1, inplace=True)
labeled_df

Unnamed: 0,json_path,implement_class,image_name,bbox
0,bucket_data/prod_clean/Q2Fubm9weSBTcHJheWVyPE8...,Cannopy Sprayer;Rears;Pul Blast 300 Gallon Narrow,uat_raw/canopysprayer_rears_pulblast_2023-01-2...,"[0, 810, 625, 1139]"
1,bucket_data/prod_clean/Q2Fubm9weSBTcHJheWVyPE8...,Cannopy Sprayer;Rears;Pul Blast 300 Gallon Narrow,uat_raw/canopysprayer_rears_pulblast_2023-01-2...,"[0, 810, 645, 1160]"
2,bucket_data/prod_clean/Q2Fubm9weSBTcHJheWVyPE8...,Cannopy Sprayer;Rears;Pul Blast 300 Gallon Narrow,uat_raw/canopysprayer_rears_pulblast_2023-01-2...,"[0, 810, 632, 1150]"
3,bucket_data/prod_clean/Q2Fubm9weSBTcHJheWVyPE8...,Cannopy Sprayer;Rears;Pul Blast 300 Gallon Narrow,uat_raw/canopysprayer_rears_pulblast_2023-01-2...,"[0, 810, 635, 1148]"
4,bucket_data/prod_clean/Q2Fubm9weSBTcHJheWVyPE8...,Cannopy Sprayer;Rears;Pul Blast 300 Gallon Narrow,uat_raw/canopysprayer_rears_pulblast_2023-01-2...,"[0, 810, 630, 1146]"
...,...,...,...,...
5252,bucket_data/prod_clean/RmxhaWwgTW93ZXI8TzZrRGo...,Flail Mower;Agrimaster;Yoyo 1500,PV1066/2023-10-11-22-45-12_flail_mower,"[700, 1075, 490, 1414]"
5253,bucket_data/prod_clean/RmxhaWwgTW93ZXI8TzZrRGo...,Flail Mower;Agrimaster;Yoyo 1500,PV1066/2023-11-12-20-46-13_flail_mower,"[724, 1075, 507, 1433]"
5254,bucket_data/prod_clean/RmxhaWwgTW93ZXI8TzZrRGo...,Flail Mower;Agrimaster;Yoyo 1500,PV1066/2023-10-24-22-19-57_flail_mower,"[716, 1075, 488, 1396]"
5255,bucket_data/prod_clean/RmxhaWwgTW93ZXI8TzZrRGo...,Flail Mower;Agrimaster;Yoyo 1500,PV1066/2023-11-20-01-24-29_Mower,"[559, 983, 363, 1574]"
