In [1]:
import os
import cv2
import rawpy
import pysftp
import logging
import paramiko
import subprocess

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from time import sleep
from base64 import b64decode

#ERDA Settings
SFTP_PUBLIC_KEY = """AAAAB3NzaC1yc2EAAAADAQABAAABAQC5jIRJYvHM4oiKTd0jvEbDyzToL1RCQiPrDbXHaEjhLaZzLWZ/GmxhdcYWF9VqfgYhbO1Jc6Had/JaxQd6Gq7adrOlWHwiNiuFwcBlDsuf0zha/qup9PZ2Gm21tNmE5OKReEqn5u5O39zHzU5rWOzlaNudeG5HltbZg0NazSSRvamdChU3n2NS2rbjQZq7ERQcaA5+673lqXUJ+7tL39H6am0R02JkJaaXwuffPAG8JcMQUAWl4CVCpu1CgbhsaPBerOARlP5yM+leXtUvy/F+RWVchl6xRtE95LSsh09hD4k6IpQ1Q5V9oZnoroaA1OFP5k/+amUjwqJEiNNR0Mvp"""
ERDA_USERNAME = "ngw861@alumni.ku.dk"
ERDA_SFTP_PASSWORD = "C 1T([mRH=P:tUY_:xX!.+6qlPZhPL"
ERDA_HOST = 'io.erda.dk'
ERDA_PORT = 22
ERDA_FOLDER = 'NHMD_digitization/image_storage'

#LOCAL Settings

#For storing images immediately after they are taken by the camera
#This folder is regularily cleared of all data
TEMP_IMAGE_CACHE_PATH = '/IMAGE_CACHE'

#This is for keeping a local copy of all the images taken. This
# will need to be periodically cleared, but not as often as the cache
LOCAL_IMAGE_STORAGE_PATH = '/IMAGE_STORAGE'


class ERDA():
    def __init__(self):
        self.sftp = self.connectSFTP()
        
    def connectSFTP(self):
        key = paramiko.RSAKey(data=b64decode(SFTP_PUBLIC_KEY))
        cnopts = pysftp.CnOpts()
        cnopts.hostkeys.add(ERDA_HOST, 'ssh-rsa', key)
        
        sftp = pysftp.Connection(host=ERDA_HOST, username=ERDA_USERNAME, 
                                 password=ERDA_SFTP_PASSWORD, cnopts=cnopts)
        return sftp
    
    def upload(self, localPath, remotePath):
        self.sftp.put(localPath, remotePath)
        
    def download(self, remotePath, localPath):
        self.sftp.get(remotePath, localpath = localPath)
    
    def getFiles(self, folder):
        return self.sftp.listdir(folder)
    
    def checkUploaded(self, erdaPath, cachePath):
        erdaFolder = '/'.join(erdaPath.split('/')[:-1])
        files = self.getFiles(erdaFolder)
        cacheFile = cachePath.split('/')[-1]
        
        if cacheFile in files:
            logging.info('ERDA Upload okay for %s'%cacheFile)
            return True
        else:
            logging.info('Something messed up %s '%cacheFile)
            return False
    
    def close(self):
        self.sftp.close()
        

def getFiles(folder):
    return os.listdir(folder)
    
def createTiff(arwPath):
    name = arwPath.split('/')[-1].split('.')[0]
    tiffFile = name + '.tiff'
    tiffPath = os.path.join(CACHE_FOLDER, tiffFile)
   
    subprocess.check_output(['sips', '-s','format','tiff', arwPath, '--out', tiffPath])
    return tiffPath
    
def getARWFiles(folder):
    ARWFiles = [f for f in getFiles(folder) if f.endswith('.arw')]
    if len(ARWFiles):
        paths = [os.path.join(folder, image) for image in ARWFiles]
        paths.sort(key=os.path.getctime)
        return [path.split('/')[-1] for path in paths]
    else:
        return '', ''

    
def deleteFile(_file):
    return os.remove(_file)

In [2]:
def deleteFilesfromDump():
    local_files = getFiles('/home/rob/HWR/DUMP/')
    for local_file in local_files:
        if (not local_file.endswith('.arw')) & (not local_file.endswith('.DS_Store')):
            path = os.path.join('/home/rob/HWR/DUMP/', local_file)
            deleteFile(path)

In [3]:

erda = ERDA()

erda_files = erda.getFiles(ERDA_FOLDER)
already_uploaded = erda.getFiles('alcon image analysis/Images of SNM specimens/Upper Surfaces')
already_uploaded += erda.getFiles('alcon image analysis/Images of SNM specimens/Lower Surfaces')
erda.close()



In [None]:
files = [file for file in erda_files if file.endswith('.arw')]
files = [file for file in files if len(file.split('_')) == 6]
df = pd.DataFrame(files,columns=['fn'])
df['cat_no'] = df['fn'].map(lambda x: x.split('_')[0].split('-')[1])
df['side'] = df['fn'].map(lambda x: x.split('_')[1])
df['date'] = df['fn'].map(lambda x: x.split('_')[2])
df['time'] = df['fn'].map(lambda x: x.split('_')[3])
df['suffix'] = df['fn'].map(lambda x: x.split('_')[5])
df['key'] = df['fn'].map(lambda x: '_'.join(x.split('_')[:-2]))
ph = df[df['cat_no'].map(lambda x: x.startswith('22'))]
already_uploaded_keys = ['_'.join(fn.split('_')[:-1]) for fn in already_uploaded]
print(len(ph))
for key in already_uploaded_keys:
    ph = ph[ph['key']!= key]
print(len(ph))

5881
5227


In [69]:
erda.close()

In [None]:
erda = ERDA()
deleteFilesfromDump()
local_folder = '/home/rob/HWR/DUMP/'
for key,group in tqdm(ph.groupby('key')):
    for ix,row in group.iterrows():
        localPath = local_folder + row['suffix']
        remotePath = 'NHMD_digitization/image_storage/' + row['fn']
        erda.download(remotePath,localPath)
    
    stack = getFiles(local_folder)
    if len(stack) != 6:
        print('wtf')
        #break
    sx = 2500
    ex = 4800
    sy = 2500
    ey = 4000
    variances = []
    for file in stack:
        localPath = local_folder + file
        with rawpy.imread(localPath) as raw:
            img = raw.postprocess()[sy:ey,sx:ex]
            variances += [cv2.Laplacian(img, cv2.CV_64F).var()]
    fileWithMostContrast = stack[np.argmax(variances)]
    remote_folder = 'Upper Surfaces' if row['side'] == 'D' else 'Lower Surfaces'
    remotePath = 'alcon image analysis/Images of SNM specimens/' + remote_folder + '/' + row['key'] + '_' + fileWithMostContrast
    localPath = local_folder + fileWithMostContrast
    erda.upload(localPath,remotePath)
erda.close()

  2%|▏         | 19/872 [4:13:06<198:15:11, 836.71s/it]

In [80]:
aPath = 'NHMD_digitization/image_storage/' + row['key'] + '_' + fileWithMostContrast   
aPath, remotePath

('NHMD_digitization/image_storage/NHMD-227369_D_20181016_110025_0.arw',
 'alcon image analysis/Images of SNM specimens/Upper Surfaces/NHMD-227369_D_20181016_110025_0.arw')

In [83]:
erda = ERDA()
erda.sftp.execute('cp %s %s'%(aPath, remotePath))

KeyboardInterrupt: 

In [None]:
    for local_file in local_files:
        arwCachePath = os.path.join(CACHE_FOLDER, local_file)
        tiffCachePath = createTiff(arwCachePath)
        tiff_name = tiffCachePath.split('/')[-1]

        arwLocalPath = os.path.join(STORAGE_FOLDER, local_file)
        tiffLocalPath = os.path.join(STORAGE_FOLDER, tiff_name)

        arwERDAPath = os.path.join(ERDA_FOLDER, local_file)
        tiffERDAPath = os.path.join(ERDA_FOLDER, tiff_name)

        erda.upload(tiffCachePath, tiffERDAPath)
        uploadedTiff = erda.checkUploaded(tiffERDAPath, tiffCachePath)

        erda.upload(arwCachePath, arwERDAPath)
        uploadedARW = erda.checkUploaded(arwERDAPath, arwCachePath)

        if uploadedARW & uploadedTiff:
            deleteFile(arwCachePath)
            deleteFile(tiffCachePath)

    erda.close()