<a href="https://colab.research.google.com/github/keeganheilman/filename-verfification/blob/main/Set_Filename_Verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set Filename Verification
This notebook is a tool designed to assist Proofers compare a masterlist of filenames with those found in the set folder(s).

⚠ Prior To Running
1.   Ensure Shared Google Drive Set Folder is added as a shortcut to your Google Drive. [How to create/remove Google Drive shortcut](https://support.google.com/drive/answer/9700156?hl=en&co=GENIE.Platform%3DDesktop)




In [1]:
# Import required packages
from os import walk
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Check for folder name containing "SET"
!ls "/content/drive/My Drive" | grep "SET"

SET 17


In [3]:
# Assign SET_FOLDER_NAME variable as the name of the set's folder in Google Drive
SET_FOLDER_NAME = "SET 17"

In [4]:

def checkForDuplicates(filename_list):
  """
  


  """
  duplicate_filenames = set()
  for filename in filename_list:
    if filename_list.count(filename) > 1:
      duplicate_filenames.add(filename)
  return duplicate_filenames



def getFolderFilenames(SET_FOLDER_NAME):
  """


  """
  filenames_ALL = []
  folders = next(walk(f'/content/drive/My Drive/{SET_FOLDER_NAME}/'), (None, None, []))[1]

  for folder in folders:
    filenames = next(walk(f'/content/drive/My Drive/{SET_FOLDER_NAME}/{folder}'), (None, None, []))[2]
    print(f'number of filenames in {folder} folder: {len(filenames)}')
    duplicate_filenames = checkForDuplicates(filenames)
    if duplicate_filenames:
      print(f'/{SET_FOLDER_NAME}/{folder} contains duplicate filenames: {duplicate_filenames}')
    filenames_ALL += filenames
    duplicate_filenames = checkForDuplicates(filenames_ALL)
    if duplicate_filenames:
      print(f'combined ALL folders contains duplicate filenames: {duplicate_filenames}')

  print(f'number of filenames in ALL folders: {len(filenames_ALL)}')
  return filenames_ALL


def getMasterlistFilenames():
  """


  """
  masterlist_filenames = ['aneffectivedemonstration.png',
                          'bb9e.png',
                          'beginlandingyourtroopsfightersstraightahead.png',
                          'blizzard1.png',
                          'burg.png',
                          'captainneeda.png',
                          'coruscantguard.png',
                          'deputydirectorharusison.png',
                          'eriadu.png',
                          'garsaxon.png',
                          'grummgar.png',
                          'hothnorthridge.png',
                          'landocalrissianvadersbroker.png',
                          'lockthedoor.png',
                          'malachorsithtemplegateway.png',
                          'momentoftriumph.png',
                          'officervalinhess.png',
                          'passelargente.png',
                          'qiratoplieutenant.png',
                          'revengeofthesith.png',
                          'riseofthesith.png',
                          'sidiouslightsaber.png',
                          'tragedyofplagueis.png',
                          'unlimitedpower.png',
                          'wattambor.png',
                          'admiralkilian.png',
                          'ajanklosstrainingcourse.png',
                          'anotherpatheticlifeformsecuritycontrol.png',
                          'bewithme.png',
                          'bokatan.png',
                          'boba.png',
                          'cliegglars.png',
                          'dagobah.png',
                          'endoranakinsfuneralpyre.png',
                          'everythingweneed.png',
                          'grakchawwaa.png',
                          'heroofathousanddevices.png',
                          'kananjarrusjediknight.png',
                          'kefbiroceanicwreckage.png',
                          'marajade.png',
                          'myparentswerestrong.png',
                          'nowthisispodracing.png',
                          'ouneeta.png',
                          'profunditydockingbay.png',
                          'r3a2.png',
                          'returnofajedi.png',
                          'tatooineskywalkerhut.png',
                          'tatooineslavequarters.png',
                          'thedestinyofajedi.png',
                          'theforceisstronginmyfamily.png',
                          'theriseofskywalker.png',
                          'yourthoughtsdwellonyourmother.png']
  print(f'number of filenames in masterlist: {len(masterlist_filenames)}')
  duplicate_filenames = checkForDuplicates(masterlist_filenames)
  if duplicate_filenames: 
    print(f'masterlist contains duplicate filenames: {duplicate_filenames}') 
  return masterlist_filenames


def compareFilenames(folder_filenames, masterlist_filenames):
  folder_filenames_set = set(folder_filenames)
  masterlist_filenames_set = set(masterlist_filenames)
  missing_filenames_in_masterlist = folder_filenames_set.difference(masterlist_filenames_set)
  missing_filenames_in_folders = masterlist_filenames_set.difference(folder_filenames_set)

  if len(missing_filenames_in_folders):
    print(f'folders are missing {len(missing_filenames_in_folders)} filenames: {missing_filenames_in_folders}')
  if len(missing_filenames_in_masterlist):
    print(f'masterlist is missing {len(missing_filenames_in_masterlist)} filenames: {missing_filenames_in_masterlist}')




In [5]:
# Obtain filenames from folders
folder_filenames = getFolderFilenames(SET_FOLDER_NAME)

# Obtain filenames from masterlist
masterlist_filenames = getMasterlistFilenames()

# Compare filenames
compareFilenames(folder_filenames,masterlist_filenames)





number of filenames in LIGHT folder: 28
number of filenames in ERRATA folder: 5
number of filenames in DARK folder: 25
number of filenames in ALL folders: 58
number of filenames in masterlist: 52
folders are missing 1 filenames: {'lockthedoor.png'}
masterlist is missing 7 filenames: {'bowtothefirstorder.png', 'closetheblastdoors.png', 'ouneetav.png', 'anakinskywalker.png', 'ouronlyhope.png', 'bluesquadron1.png', 'vadersanger.png'}
