In [None]:
import os
import shutil
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def get_unique_file_extensions(src_folder):
    unique_file_extensions = set()

    for root, _, files in os.walk(src_folder):
        for filename in files:
            file_ext = os.path.splitext(filename)[1]
            unique_file_extensions.add(file_ext)

    return unique_file_extensions

if __name__ == "__main__":
    source_folder = "/content/drive/MyDrive"
    unique_extensions = get_unique_file_extensions(source_folder)
    print(unique_extensions)

{'', '.pdf', '.pptx', '.xlsx', '.docx', '.xls', '.gsheet', '.xlsm', '.csv', '.ipynb', '.gdoc', '.txt', '.jpg', '.png', '.sqlite'}


In [None]:
file_categories = {
    'Office/Documents': ['.txt', '.pdf', '.doc', '.docx', '.rtf', '.gdoc', '.odt'],
    'Office/Presentations': ['.ppt', '.pptx', '.gslides', '.odp', '.key'],
    'Office/Spreadsheets': ['.xls', '.xlsx', '.gsheet', '.ods', '.numbers', '.csv'],
    'Coding&Data/Code': ['.xml', '.json', '.php', '.sh', '.cpp', '.js', '.pde', '.java', '.cs', '.c', '.h', '.go', '.rb', '.pl', '.swift', '.ts'],
    'Coding&Data/Python': ['.py', '.pyc'],
    'Coding&Data/HTML&CSS': ['.html', '.css', '.scss', '.sass', '.less'],
    'Coding&Data/Jupyter_Notebooks': ['.ipynb'],
    'Coding&Data/Database_Files': ['.db', '.sql', '.sqlite', '.accdb'],
    'Multimedia/Videos': ['.mp4', '.mkv', '.flv', '.avi', '.mov', '.wmv'],
    'Multimedia/Images': ['.svg', '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.tiff', '.webp'],
    'Multimedia/Audio': ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac'],
    'Multimedia/Fonts': ['.ttf', '.otf', '.woff', '.woff2', '.eot'],
    'Multimedia/3D_Models': ['.obj', '.fbx', '.dae', '.3ds', '.blend'],
    'Multimedia/Vector_Graphics': ['.ai', '.eps', '.sketch'],
    'Multimedia/CAD_Files': ['.dwg', '.dxf'],
    'Google_Workspace/Drawings': ['.gdraw'],
    'Google_Workspace/Sites': ['.gsite'],
    'Google_Workspace/Forms': ['.gform'],
    'Google_Workspace/Jamboard': ['.gjam'],
    'Google_Workspace/Scripts': ['.gscript'],
    'Others/Shortcuts': ['.lnk'],
    'Others/Markdown': ['.md', '.markdown'],
    'Others/PGP_Keys': ['.asc'],
    'Others/Python_Code': ['.pyc', '.pyo'],
    'Others/Archives': ['.zip', '.tar', '.rar', '.7z', '.gz', '.bz2'],
    'Others/eBooks': ['.epub', '.mobi', '.azw', '.azw3'],
    'Others': [''],
}

In [None]:
def categorize_files(src_folder, file_categories=file_categories, go_through=True):

    def move_file_to_category(file_path, filename, file_ext):
        for category, extensions in file_categories.items():
            if file_ext in extensions:
                dest_folder = os.path.join(src_folder, category)
                break
        else:
            dest_folder = os.path.join(src_folder, 'Others')

        os.makedirs(dest_folder, exist_ok=True)
        shutil.move(file_path, os.path.join(dest_folder, filename))

    def process_files(root, files):
        for filename in tqdm(files, desc=f"Processing files in {root}", unit="file"):
            file_path = os.path.join(root, filename)

            if os.path.isdir(file_path):
                continue

            file_ext = os.path.splitext(file_path)[1]
            move_file_to_category(file_path, filename, file_ext)
        if go_through:
          try:
              os.rmdir(root)
              print(f"Successful deleting folder: {root}")
          except OSError:
              pass

    if go_through:
        folder_iterator = os.walk(src_folder)
    else:
        folder_iterator = [(src_folder, [], os.listdir(src_folder))]

    [process_files(root, files) for root, _, files in folder_iterator]

In [None]:
source_folder = "/content/drive/MyDrive"
categorize_files(source_folder)

Processing files in /content/drive/MyDrive: 100%|██████████| 21/21 [00:00<00:00, 231.08file/s]
Processing files in /content/drive/MyDrive/Colab Notebooks: 100%|██████████| 15/15 [00:00<00:00, 187.24file/s]


Successful deleting folder: /content/drive/MyDrive/Colab Notebooks


Processing files in /content/drive/MyDrive/Smart Serve: 100%|██████████| 2/2 [00:00<00:00, 257.77file/s]
Processing files in /content/drive/MyDrive/Smart Serve/Exam: 100%|██████████| 8/8 [00:00<00:00, 226.56file/s]


Successful deleting folder: /content/drive/MyDrive/Smart Serve/Exam


Processing files in /content/drive/MyDrive/Term-2_Learning: 100%|██████████| 2/2 [00:00<00:00, 264.24file/s]


Successful deleting folder: /content/drive/MyDrive/Term-2_Learning


Processing files in /content/drive/MyDrive/DV 2: 100%|██████████| 6/6 [00:00<00:00, 267.07file/s]


Successful deleting folder: /content/drive/MyDrive/DV 2


Processing files in /content/drive/MyDrive/LinkedIn: 100%|██████████| 1/1 [00:00<00:00, 274.64file/s]
Processing files in /content/drive/MyDrive/LinkedIn/apporva-komatineni-BMO: 100%|██████████| 40/40 [00:00<00:00, 260.84file/s]


Successful deleting folder: /content/drive/MyDrive/LinkedIn/apporva-komatineni-BMO


Processing files in /content/drive/MyDrive/LinkedIn/Project 1 - Financial Forecasting: 100%|██████████| 4/4 [00:00<00:00, 247.67file/s]


Successful deleting folder: /content/drive/MyDrive/LinkedIn/Project 1 - Financial Forecasting


Processing files in /content/drive/MyDrive/LinkedIn/Project 2: 100%|██████████| 2/2 [00:00<00:00, 188.65file/s]
Processing files in /content/drive/MyDrive/LinkedIn/Project 2/folder: 100%|██████████| 4/4 [00:00<00:00, 241.99file/s]


Successful deleting folder: /content/drive/MyDrive/LinkedIn/Project 2/folder


Processing files in /content/drive/MyDrive/Study permit Extension 2025: 0file [00:00, ?file/s]

Successful deleting folder: /content/drive/MyDrive/Study permit Extension 2025



