In [1]:
import os
import sys
module_path = "../src"
import cv2
#os.environ["WANDB_SILENT"] = "true" # Environment Variable to make wandb silent
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# 1. General Libraries
import pandas as pd
import numpy as np
from pathlib import Path
import yaml
import csv
from tqdm.notebook import tqdm
# 2. DataModule & Class Libraries
from utils.label_encoder import label_encoder_target
from utils.calc_stat import calc_dataset_mean_std
from dataset.ImageDataModule import ImageDataModule
from dataset.ImageDataset import ImageDataset
from models.EfficientNetClass import EfficientNetClass
from models.ResNetClass import ResNetClass

# 3. Pytorch & Pytorch Lightning Libraries
from pytorch_lightning import Trainer,seed_everything
from torchvision import transforms
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
from skimage import io
from skimage.color import gray2rgb
# 4. Wandb Tracker Experiements
import wandb
import matplotlib.pyplot as plt

In [3]:
def generate_csv_newdata(route,extensions =['.jpg','jpge','.png']):
    file_list = []
    figsplit_paths = [os.path.join(route,i) for i in os.listdir(route) if i.startswith('figsplit_')]
    for filepath in tqdm(figsplit_paths):
        for root, dirnames, filenames in os.walk(filepath):
            for file in filenames:
                for ext in extensions:
                    if file.endswith(ext):
                        file_list.append(os.path.join(root,i))
                        print(os.path.join(root,i))
    return file_list

In [4]:
def write_csv_newdata(route,output_path,extensions =['.jpg','jpge','.png']):
    with open(output_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['ID', 'img','source', 'img_path'])
        count = 1
        file_list = []
        figsplit_paths = [os.path.join(route,i) for i in os.listdir(route) if i.startswith('figsplit_')]
        for filepath in tqdm(figsplit_paths):
            for root, dirnames, filenames in os.walk(filepath):
                if len(filenames)>0:
                    for file in filenames:
                        for ext in extensions:
                            if file.endswith(ext):
                                if len(file)>0:
                                    img_path = os.path.join(root,file).replace('/mnt/','')
                                    img_name = os.path.join(root,file).replace(route,'').replace('/','_')  
                                    img = cv2.imread('/mnt/'+img_path)
                                    if img is None:
                                        print(img_path)
                                    else:
                                        row = [count, img_name,'wormbase', img_path]
                                        writer.writerow(row)  
                                        count += 1

In [5]:
write_csv_newdata('/mnt/wormbase_output/','../data/wormbase_unlabeled.csv',extensions =['.jpg'])

HBox(children=(FloatProgress(value=0.0, max=2764.0), HTML(value='')))

wormbase_output/figsplit_pmid21901112/9_1/002.jpg
wormbase_output/figsplit_pmid28056346/10_1/002.jpg
wormbase_output/figsplit_pmid22037694/10_1/002.jpg
wormbase_output/figsplit_pmid28675140/7_1/006.jpg
wormbase_output/figsplit_pmid21558371/7_1/006.jpg



In [3]:
route = '/mnt/allen/cord/output/'
extensions  = ['.jpg']
output_path = '../data/CORD_19_unlabeled.csv'
with open(output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['ID', 'img','source','img_path'])
    count = 1
    for file_route in tqdm([os.path.join(route,i) for i in os.listdir(route) if i !='.gitignore']):
        for folder in os.listdir(file_route):
            try :
                if (folder.startswith('figsplit')) & (folder.split('_')[1]!='figsplit') & ('pdf' not in folder) :
                    for root, dirnames, filenames in os.walk(os.path.join(file_route,folder)):
                        if len(filenames)>0:
                            for file in filenames:
                                for ext in extensions:
                                    if file.endswith(ext):
                                        if len(file)>0:
                                            img_path = os.path.join(root,file).replace('/mnt/','')
                                            img_name = os.path.join(root,file).replace(route,'').replace('/','_')  
                                            img = cv2.imread('/mnt/'+img_path)
                                            if img is None:
                                                print('Vacio: ',img_path)
                                            else:
                                                row = [count,img_name, 'CORD19',img_path]
                                                writer.writerow(row)  
                                                count += 1
            except:
                continue

HBox(children=(FloatProgress(value=0.0, max=16616.0), HTML(value='')))


