In [3]:
import os
import shutil
import time
from FindFiles import find_files
from os import walk

def move(from_dir, to_dir, regex, ignore_empty = True, replace_existing = False):
    lst_files = find_files(from_dir, regex, remove_empty = ignore_empty)
    cnt = 0
    for f in lst_files:
        dfile = os.path.join(to_dir, os.path.basename(f))
        if not replace_existing and os.path.exists(dfile):
            print "%s already exists" % dfile
        else:
            shutil.copyfile(f, dfile)
            cnt += 1
    print "Moved %s files" % str(cnt)
    
def replace_if_newer(from_dir, to_dir, regex, ignore_empty = True, content_filter = None):
    if content_filter:
        content_filter = content_filter.strip()
    
    lst_files = find_files(from_dir, regex, remove_empty = ignore_empty)
    cnt = 0
    for from_file in lst_files:
        to_file = os.path.join(to_dir, os.path.basename(from_file))
        if os.path.exists(to_file):
            from_time = time.ctime(os.path.getmtime(from_file))
            to_time = time.ctime(os.path.getmtime(to_file))

            with open(to_file) as f:
                to_contents = f.read()

            if from_time >= to_time:
                if content_filter:        
                    if to_contents.strip() == content_filter:
                        print "Passing on %s as destination file matches content filter" % to_file
                        continue
                
                os.remove(to_file)
                shutil.copyfile(from_file, to_file)
            else:
                with open(from_file) as f:
                    from_contents = f.read()
                ratio = len(from_contents) / float(len(to_contents) + 1)
                if ratio >= 10.0:
                    os.remove(to_file)
                    shutil.copyfile(from_file, to_file)
                    print "Copying %s as source file is %f times larger than destination file" % (from_file,ratio)
                else:
                    print "Passing on %s as destination file is newer" % from_file
        else:
            shutil.copyfile(from_file, to_file)
            cnt += 1
    print "Moved %s files" % str(cnt)
    
def list_folders(folder):
    for (dirpath, dirnames, filenames) in walk(folder):
        return dirnames
    return None

In [4]:
root = "/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415"
to_dir = "%s/%s" % (root,"Merged") 

for folder in sorted(list_folders(root)):
    from_dir = "%s/%s" % (root,folder)
    if from_dir == to_dir:
        continue
    print from_dir
    replace_if_newer(from_dir, to_dir, "^.*\.ann$")
    replace_if_newer(from_dir, to_dir, "^.*\.txt$")

/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/3-17-15
Moved 65 files
Moved 65 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/4-15-15
Moved 284 files
Moved 284 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/5-15-15
Moved 213 files
Moved 218 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/6-2-15
Moved 227 files
Passing on /Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/6-2-15/EBA1415_TFHC_1_CB_ES-05939.txt as destination file is newer
Moved 228 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/6-24-15
Moved 179 files
Moved 180 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/CoralBleaching/EBA1415/7-20-15
Moved 186 files
Moved 186 files


In [None]:
# EBA1415/6-2-15/EBA1415_TFHC_1_CB_ES-05939.txt as destination file is newer is in 5-15-15 also

In [5]:
root = "/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415"
to_dir = "%s/%s" % (root,"Merged") 

for folder in sorted(list_folders(root)):
    from_dir = "%s/%s" % (root,folder)
    if from_dir == to_dir:
        continue
    print from_dir
    replace_if_newer(from_dir, to_dir, "^.*\.ann$")
    replace_if_newer(from_dir, to_dir, "^.*\.txt$")

/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/3-17-15
Moved 74 files
Moved 74 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/4-15-15
Moved 251 files
Moved 252 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/5-15-15
Moved 223 files
Moved 225 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/6-2-15
Moved 192 files
Moved 192 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/6-24-15
Moved 253 files
Moved 255 files
/Users/simon.hughes/Google Drive/PhD/Data/all-essays-2015-09/SkinCancer/EBA1415/7-20-15
Moved 114 files
Moved 116 files
