## Recursively remove spaces from file names

When you use this common command:

``` find ./ ...    | xargs grep  ... ```

it will break on names consisting of several words separated by spaces,
<br>because it will try to treat each word as a separate file name. 

You can go around this problem using options 
<br>to use ASCII NUL character instead of space 
<br>to end (separate) the filenames
```
  -print0 option in find,
  -0      option in xargs
```
for example:

``` find ./ -type f -print0 | xargs -0 ls -1 ```

Or you can simply run the code below
<br>which will rename directories and files
<br>by substituting spaces, quotes, etc.
<br>by underscores

In [None]:
import sys, os, glob, subprocess, time, datetime

In [None]:
# provide absolute path under which 
# you want to clean out spaces from names
# MYDIR = "/some/path"
# ----------------------------------
HOME = os.environ['HOME']
MYDIR = HOME + "/"
print(MYDIR)
print("-"*50)
if not os.path.isdir(MYDIR):
    print("ERROR - no such directory")
# ----------------------------------
skip_list = [
    "Applications",
    "Applications (Parallels)",
    "Parallels",
    "Public",
    "VicsLogoMatcher",
    "brett_env",
    "gowork",
    "proj1_env",
    "test",
    "test2",
    "Google Drive",
    "Library",
    "Music/Music",
    "Pictures",
    "Movies",
    "Documents",
    "Calibre Library",
    "docs_big/SOFT_big",
    "docs/mySOFT_sn",
    "anaconda3"
]

for mydir in skip_list: 
    print(mydir)
# ----------------------------------
def print_date_time():
    print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# ----------------------------------
time1 = time.time()
# ----------------------------------

In [None]:
def myrun(cmd):
    """ simple function to run shell command and return a string """
    try:
        txt = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
    except Exception as e:
        txt = e.output
    txt = txt.decode().strip()
    return txt

#### Clean directories' names layer by layer.
#### Repeat next cell several times until there is no output.

In [None]:
os.chdir(MYDIR)
mydir = os.getcwd()
print(mydir)
print("getting list of all directories")
out_str = myrun("/usr/local/bin/fd -t d")
print(len(out_str))
print("-"*50)
out_list = out_str.split("\n")
print(len(out_list))

for ii in range(10):
    print(out_list[ii])
print("-"*50)

# double check that they are directories
dirs1 = []
for ff in out_list:
    if not os.path.isdir(ff):
        continue
    dirs1 += [ff]
print(len(dirs1))
print("-"*50)
print("remove directories which are in skip list")
dirs = []
for dd in dirs1:
    flag = False
    for skip_dir in skip_list:
        if skip_dir in dd:
            flag=True
            break
    if flag == True:
        continue
    dirs += [dd]
print(len(dirs))
print("-"*50)
print("rename directories (spaces and quotes to underscores)")
N = len(dirs)
print(f"found {N:,d} dirs")
counter = 0
for mydir in dirs:
    if (" " not in mydir) and ("'" not in mydir):
        continue
    mydir2 = mydir.replace(" ","_").replace("'","_")
    print(f"renaming: {mydir} => {mydir2}")
    os.rename(mydir, mydir2)
    counter += 1
if counter >= 1:
    print(f"renamed {counter} directories, run again !")
else:
    print(f"no directories to rename, good")
print("DONE")

#### Next we can rename files to remove spaces, quotes, double quotes, "&".

In [None]:
# ----------------------------------
def rename_if_needed(myfile):
    myfile2 = myfile[:]
    if " " in myfile:
        myfile2 = myfile2.replace(" ","_")
    if "'" in myfile:
        myfile2 = myfile2.replace("'","_")
    if '"' in myfile:
        myfile2 = myfile2.replace('"',"_")
    if '&' in myfile:
        myfile2 = myfile2.replace('&',"_")      
    if myfile2 != myfile:
        print(f"renaming: {myfile} => {myfile2}")
        os.rename(myfile, myfile2) 

# ----------------------------------
def in_skip_list(myfile):
    skip_flag = False
    for elem in skip_list:
        if elem in myfile:
            skip_flag = True
    return skip_flag

#### Run the next cell twice until there is no output

In [None]:
%%time
print_date_time()
print(f"Getting list of files under {MYDIR} - takes some time")
# -------------------------------
os.chdir(MYDIR)
files1 = myrun("/usr/local/bin/fd -t f").split("\n")
print_date_time()
print(f"removing 'skip_list' and renaming as needed")
files = []
for ff in files1:
    if in_skip_list(ff):
        continue
    to_rename = False
    if (" " in ff) or ("'" in ff) or ('"' in ff) or ('&' in ff):
        files += [ff]
        rename_if_needed(ff)
# -------------------------------
Nfiles = len(files)
print(Nfiles)
print("-"*20)
if Nfiles <= 0:
    print("Nothing to rename")
else:
    for ii in range(min(10,Nfiles)):
        print(files[ii])    

In [None]:
time2 = time.time()
elapsed = time2-time1
print(f"Elapsed seconds = {elapsed:.3f}")