# inhagcutils

A set of modules and functions I seem to need in too many Colab Notebooks.

### Import modules

In [None]:
import sys, os, ntpath, string, random, librosa, librosa.display, IPython, shutil, math, psutil, datetime, requests
from glob import glob
from os.path import isdir, join
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from distutils.dir_util import copy_tree
from urllib.request import urlopen
from PIL import Image

### Common variables

In [None]:
# Root dirs
drive_root = '/content/mydrive'
drive_root = "/content/drive/MyDrive/"
dir_tmp = "/content/tmp/"

# Quiet apps
ffmpeg_q = "-hide_banner -loglevel panic" # -hide_banner -loglevel panic
sox_q = "-q" # -S, -q
youtube_q = "-q" # -q
wget_q = "-q" # -q
git_q = "-q" # -q
curl_q = "-s" # -s
pip_q = "-q" # -q

# Plot colorspace
plot_bg = '#272822'
plot_wav = '#d5d5d5'

# Audio formats for FFmpeg
wav_16 = "-c:a pcm_s16le -ar 16000 -ac 2 " # Signed 16-bit 44.1kHz Stereo
wav_44 = "-c:a pcm_s16le -ar 44100 -ac 2 " # Signed 16-bit 44.1kHz Stereo
wav_48 = "-c:a pcm_s24le -ar 48000 -ac 2"  # Signed 24-bit 48kHz Stereo
mp3_192 = "-vn -ar 44100 -ac 2 -b:a 192k"  # 44.1kHz 192kbps Stereo
mp3_320 = "-vn -ar 48000 -ac 2 -b:a 320k" # 48kHz 320kbps Stereo

# Packed files
zip_extensions = ('zip', 'gz')

### Functions: general

In [None]:
class c:
  title = '\033[96m'
  ok = '\033[92m'
  okb = '\033[94m'
  warn = '\033[93m'
  fail = '\033[31m'
  endc = '\033[0m'
  bold = '\033[1m'
  u = '\033[4m'

# Input (class, str, str): message type, colored message, plain message
# Output: colored text
def op(typex, msg, value=''):
  if value != '':
    print(typex+msg+c.endc, end=' ')
    print(value)
  else:
    print(typex+msg+c.endc)

# Input: -
# Output (boolean): ipynb is running on _hosted_ Colab runtime
def is_hosted_runtime():
  return 'google.colab' in sys.modules

# Input (str, boolean): path, remove first slash
# Output (string): path with missing / at the end, opt: remove from beginning
def fix_path(path, add_slash=False):
  if path.endswith('/'):
    path = path #path[:-1]
  if not path.endswith('/'):
    path = path+"/"
  if path.startswith('/') and add_slash == True:
    path = path[1:]
  return path
  
# Input (str): path
# Output (str): filename with extension
def path_leaf(path):
  head, tail = ntpath.split(path)
  return tail or ntpath.basename(head)

# Input (str): file path
# Output (str): enclosing directory
def path_dir(path):
  return path.replace(path_leaf(path), '')

# Input (str, boolean): file path, keep dot
# Output (str): file extension with or without the .dot
def path_ext(path, only_ext=False):
  filename, extension = os.path.splitext(path)
  if only_ext == True:
    extension = extension[1:]
  return extension

# Input (str): path
# Output (str): filename without extension
def basename(path):
  filename = os.path.basename(path).strip()#.replace(" ", "_")
  filebase = os.path.splitext(filename)[0]
  return filebase

# Input (str): path
# Output (str): "dir", "file", "youtube", "link" or "unknown"
def check_input_type(path):
  if os.path.isdir(path):
    input_type = "dir"
    input = fix_path(path)
  elif os.path.isfile(path):
    input_type = "file"
  elif "://" in path and "youtu" in path:
    input_type = "youtube"
  elif "://" in path:
    input_type = "link"
  else:
    input_type = "unknown"
  return input_type

# Input (list): list
# Output (any): modal value of list
def most_frequent(list):
  freq = max(set(list), key = list.count)
  print(str(list.count(freq))+' out of '+str(len(list)), 'items have a value of', str(freq))
  return freq

# Input (str): stupid filename, e.g. "any long% weird !filename (like this).wav"
# Output (str): cool filename, e.g. "any_long_weird_filename_like_this.wav"
def slug(s):
  valid_chars = "-_. %s%s" % (string.ascii_letters, string.digits)
  file = ''.join(c for c in s if c in valid_chars)
  file = file.replace(' ','_')
  return file

# Input (int): number
# Output (str): random string of <number> characters long
def rnd_str(length):
  letters = string.ascii_lowercase
  result_str = ''.join(random.choice(letters) for i in range(length))
  return result_str

def timestamp(no_slash=False, human_readable=False):
  if no_slash:
    dt = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
  else:
    if human_readable is True:
      dt = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    else:
      dt = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
  return dt;

# Input (list): list
# Output (str): every -param item -param in -param list
def concat_list(p, s):
  p=' '+p+' '
  return (p+p.join(s))

# Input (float): probability of True
# Output (boolean): True on given probability, otherwise False
def odds(probability):
  return random.random() < probability

# Input: conda packages
# Output: -
def install_conda(packages):
  !wget {wget_q} -c https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh
  !chmod +x Anaconda3-5.1.0-Linux-x86_64.sh
  !bash ./Anaconda3-5.1.0-Linux-x86_64.sh -b -f -p /usr/local
  import sys
  sys.path.append('/usr/local/lib/python3.6/site-packages/')
  !conda install -q -y {packages}

# Input (str): path
# Output (boolean): True if file is packed
def is_zip(path):
  return path.lower().endswith(zip_extensions)

def is_zipzip(path):
  return path_ext(path).lower() == '.zip'

def is_gz(path):
  return path_ext(path).lower() == '.gz'

# Input (str): directory path
# Output (list): list of audiofiles in dir
def list_images(path):
  imagefiles = []
  for ext in ('*.jpg', '*.jpeg', '*.png', '*.gif' '*.JPG', '*.JPEG', '*.PNG', '*.GIF'):
    imagefiles.extend(glob(join(path, ext)))
  imagefiles.sort()
  return imagefiles

# Input (str): type 'short', 'long', 'timestamp'
# Output (str): Human readable 6 char ID key or timestamp
def gen_id(type='short'):
  id = ''
  if type is 'timestamp':
    id = timestamp()
  if type is 'short':
    id = requests.get('https://api.inha.asia/k/?type=short').text
  if type is 'long':
    id = requests.get('https://api.inha.asia/k').text
  return id

# Input (str, str, int, str): Google API key, search query, number to return, image size
# Output: returns selected image URL from Google Image Search
def googleImageSearch(api_key, q, nr=0, size='huge', optimize=True, add_extra_letter=False):
  cx = '696a4696eb66d64ca'
  if add_extra_letter:
    q = q + ' ' + random.choice(string.ascii_letters)
  if optimize is True:
    q = q + ' -quote -quotes -screenshot -capture -logo -banner -stock -slide -shutterfly -getty -123rf -alamy -bigstock -shutterstock -dreamstime -canstock -colourbox'
  URL = 'https://www.googleapis.com/customsearch/v1?key='+api_key+'&cx='+cx+'&q='+q
  PARAMS = {
      'searchType': 'image',
      'imgSize': size,
      'fileType': 'jpg'
  }
  r = requests.get(url = URL, params = PARAMS)
  data = r.json()
  return data['items'][nr]['link']

# Input (PIL image, int, int): image, new width, new height
# Output: PIL image cropped and resized
def coverCropResize(img, new_width, new_height):
  # Crop to not distort image
  width, height = img.size
  nw = new_width
  nh = new_height
  nwp = nw/nh
  nhp = nh/nw
  left = 0
  top = 0
  right = width
  bottom = height
  if nw > nh and (nhp * width) < height:
    new_height = nhp * width
    top = height/2 - new_width/2
    bottom = height/2 + new_height/2
  else:
    new_width = nwp * height
    left = width/2 - new_width/2
    right = width/2 + new_width/2
  cropped = img.crop((left, top, right, bottom))
  
  # Resize cropped image
  new_size = (nw, nh)
  resized = cropped.resize(new_size)
  return resized

def fetch(url, save_as):
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
  try:
    r = requests.get(url, stream=True, headers=headers, timeout=5)
    if r.status_code == 200:
      with open(save_as, 'wb') as f:
        r.raw.decode_content = True
        shutil.copyfileobj(r.raw, f)
      resp = r.status_code
  except requests.exceptions.ConnectionError as e:
    r = 0
    resp = r
  return resp

### Functions: temp file handlers

In [None]:
# Input (list): list of directory paths
# Output: -
def create_dirs(paths):
  for path in paths:
    if not os.path.isdir(path) and path != '':
      !mkdir "{path}"
      
# Input (list): list of directory paths
# Output: -
def remove_dirs(paths):
  for path in paths:
    if os.path.isdir(path):
      !rm -r "{path}"

# Input (list): list of directory paths
# Output: -
def reset_dirs(paths):
  remove_dirs(paths)
  create_dirs(paths)

# Input (list): list of directory path
# Output: -
def clean_dirs(paths):
  for path in paths:
    path = fix_path(path)
    dir = os.listdir(path)
    if len(dir) is not 0:
      !rm {path}*

# Input (str): source directory path, destination directory path
# Output: -
def copy_all(source_dir, destination_dir):
  for f in glob(source_dir+'*'):
    shutil.copy(f, destination_dir)


### Functions: audio-related

In [None]:
# Input (str): directory path
# Output (list): list of audiofiles in dir
def list_audio(path):
  audiofiles = []
  for ext in ('*.wav', '*.aiff', '*.aif', '*.caf' '*.flac', '*.mp3', '*.ogg', '*.WAV', '*.AIFF', '*.AIF', '*.CAF', '*.FLAC', '*.MP3', '*.OGG'):
    audiofiles.extend(glob(join(path, ext)))
  audiofiles.sort()
  return audiofiles

def swf(sig1, sig2='', peaks=[], rnd=False, sr=44100):
  #yellowgreen, salmon
  duration = len(sig1)/sr
  time = np.arange(0,duration,1/sr)
  plt.rcParams.update({"axes.facecolor": "black"})
  plt.ylim(-1, 1)
  if rnd==True:
    c = np.random.rand(3)
  else:
    c = '#00ffdd'
  plt.axhline(y=0, color='#fff', linewidth=0.5, alpha=0.5)
  plt.plot(time, sig1, color=c, linewidth=0.3, alpha=1)
  if len(peaks) > 0:
    for i, peak_set in enumerate(peaks):
      #print(i, peak_set)
      if i == 0:
        c = '#f3d'
        prio = .65
        lw = 0.7
      else:
        c = '#fff'
        prio = .6
        lw = 0.7
      for peak in peak_set:
        plt.axvline(x=peak/sr, color=c, linewidth=lw, alpha=prio)
  if sig2 != '':
    plt.plot(time, sig2, color=np.random.rand(3), linewidth=0.3, alpha=0.55)
  plt.show()

# Input (any): wav file path or audio signal as ndarray, duration, peak, samplerate
# Output: waveform image of audio signal
def waveform(input, dur=None, peaks=[], sr=44100):
  if type(input) == np.ndarray:
    data = input
  else:
    data, sr = librosa.load(input, sr=sr, duration=dur, offset=0.0)
  plt.rcParams['axes.facecolor'] = plot_bg
  fig = plt.figure(figsize=(16, 5), frameon=False)
  #ax = fig.add_axes([0, 0, 1, 1])
  #ax.axis('off')
  plt.axis([-1, 1, -1, 1])
  #plt.ylim(-1, 1)
  if len(peaks) > 0:
    for peak in peaks:
      plt.axvline(x=peak, color='r')
  librosa.display.waveplot(data, sr=sr, color=plot_wav)
  plt.show()

# Input (str): path to WAV file
# Output: image of pitches
def pitchform(wav_file):
  y, sr = librosa.load(wav_file)
  pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
  plt.figure(figsize=(16, 8))
  plt.imshow(pitches[:100, :], aspect="auto", interpolation="nearest", origin="bottom")
  plt.show()

# Input (str): path to WAV file
# Output: image of waveform and pitches
def waveform_pitch(wav_file):
  waveform(wav_file)
  pitchform(wav_file)

# Input (str): path to audio file
# Output: audio player
# 2 min limit for WAV files, MP3 files can probably last longer.
def audio_player(input, sr=44100, limit_duration=2):
  if type(input) == np.ndarray:
    if limit_duration > 0:
      last_sample = math.floor(limit_duration*60*sr)
      if input.shape[-1] > last_sample:
        input = input[:last_sample, :last_sample]
        print('This player will play only a', limit_duration, 'minute preview of the audio provided, to prevent Colab from crashing.\n')
    IPython.display.display(IPython.display.Audio(input, rate=sr))
  else:
    if limit_duration > 0 and limit_duration*60 < librosa.get_duration(filename=input):
      print('Audio is over', limit_duration, 'minutes long. Unable to provide audio player without crashing Colab.')
    else:
      IPython.display.display(IPython.display.Audio(input))

# Input (str): path to MP3 file, path to WAV file (of the same track)
# Output: image of waveform, image of pitches and audio player
def waveform_player(mp3, wav):
  waveform_pitch(wav)
  audio_player(mp3)

# Output: creates current /content/cfg.json
def configSpleeter():
  !gsutil -q -m cp -R gs://neural-research/olaviinha/spleeter-configs/custom-4stems-22kHz-a.json /content/cfg.json

### Test

In [None]:
# create_dirs(['/a', '/a/b', '/a/b/c'])
# !cp /content/sample_data/anscombe.json /a/b/c.exs
# existing_file = '/a/b/c.exs'
# nonexisting_file = '/a/b/c.non'
# existing_path1 = '/a/b/c'
# existing_path2 = '/a/b/c/'
# testlist = ['a', 'a', 'b', 'c', 'd', 'd', 'd']
# crazyfile = "!This is a% (Real)[Crazy] File-name~1.MKV"

# print('Notebook is running in hosted Colab env:',   is_hosted_runtime() )
# print('Add end slash', existing_path1, '->',        fix_path(existing_path1))
# print('Remove start slash', existing_path2, '->',   fix_path(existing_path2, True))
# print('Last item from file path', existing_file, '->', path_leaf(existing_file))
# print('Last item from dir path', existing_path1, '->', path_leaf(existing_path1))
# print('Dir path from path', existing_file, '->', path_dir(existing_file))
# print('Basename from path', existing_file, '->', basename(existing_file))
# print('.Extension from path', existing_file, '->', path_ext(existing_file))
# print('Extension from path', existing_file, '->', path_ext(existing_file, True))
# print('Check input type for existing file path', existing_file, '->', check_input_type(existing_file))
# print('Check input type for existing dir path', existing_path1, '->', check_input_type(existing_path1))
# print('Check input type for nonexisting file', nonexisting_file, '->', check_input_type(nonexisting_file))
# print('Check most frequent value in list', testlist, '->', most_frequent(testlist))
# print('Prettify filename', crazyfile, '->', slug(crazyfile))
# print('Random string of 8 characters ->', rnd_str(8))
# print('Concatenate list with separator "-v 0.3" ->', concat_list("-v 0.3", testlist))
# print('Return True with 20% probability ->', odds(0.2))
# print('Return True with 50% probability ->', odds(0.5))
# print('Return True with 80% probability ->', odds(0.8))