In [43]:
import os

# Sample file extensions and their corresponding categories
extension_to_category = {
    "Presentation": {".pptx", ".ppt"},
    "Video": {".mkv", ".mp4", ".3gp"},
    "Audio": {".mp3", ".m4a"},
    "Text": {".txt", ".doc", ".docx"},
    "PDF": {".pdf"},
    "Python Notebook File": {".ipynb"},
    "Excel Spreadsheet": {".xlsx", ".xls"},
    "Image": {".jpg", ".jpeg", ".png", ".gif"},
    "Compressed File": {".zip", ".rar", ".7z"},
    "HTML File": {".html", ".htm"},
    "CSV File": {".csv"},
    "JSON File": {".json"},
    "XML File": {".xml"},
    "Log File": {".log"},
    "Macro-Enabled Presentation": {".pptm"},
    "AVI Video": {".avi"},
    "WAV Audio": {".wav"},
    "Executable File": {".exe"},
    "Tar Archive": {".tar", ".gz"},
    "Tar Gzipped Archive": {".tgz"},
    "PHP Script": {".php"},
    "Python Script": {".py"},
    "Java Source Code": {".java"},
    "C++ Source Code": {".cpp"},
    "C/C++ Header File": {".h", ".hpp"},
    "Shell Script": {".sh"},
    "Batch Script": {".bat"},
    "Dynamic Link Library": {".dll"},
    "Backup File": {".bak"},
    # Add more extensions and categories as needed
}

#Harshal Pimpalshende :- https://github.com/noobacker

# Function to classify a file based on its extension
def classify_document(file_path):
    # Extract the file extension and convert to lowercase
    file_extension = "." + file_path.split(".")[-1].strip().lower()

    # Check if the path is a directory
    if os.path.isdir(file_path):
        return "Folder"

    # Iterate through the extension_to_category dictionary to find the category
    for category, extensions in extension_to_category.items():
        if file_extension in extensions:
            return category

    return "Other"  # Default to "Other" if not found

# Function to classify files in a folder and count the categories
def classify_files_in_folder(folder_path):
    category_counts = {category: 0 for category in extension_to_category.keys()}
    other_count = 0
    folder_count = 0

    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        category = classify_document(item_path)
        if category == "Folder":
            folder_count += 1
        elif category == "Other":
            other_count += 1
        else:
            category_counts[category] += 1

    category_counts["Folder"] = folder_count

    return category_counts, other_count

# Example of using the classify_files_in_folder function
folder_path = input("Enter the folder path: ").strip()
category_counts, other_count = classify_files_in_folder(folder_path)

print("Category Counts:")
for category, count in category_counts.items():
    print(f"{category}: {count}")

print(f"Other: {other_count}")


Enter the folder path:  /Users/harshalpimpalshende/Downloads/


Category Counts:
Presentation: 0
Video: 2
Audio: 1
Text: 0
PDF: 19
Python Notebook File: 2
Excel Spreadsheet: 0
Image: 3
Compressed File: 3
HTML File: 0
CSV File: 0
JSON File: 0
XML File: 0
Log File: 0
Macro-Enabled Presentation: 0
AVI Video: 0
WAV Audio: 0
Executable File: 0
Tar Archive: 0
Tar Gzipped Archive: 0
PHP Script: 0
Python Script: 0
Java Source Code: 0
C++ Source Code: 0
C/C++ Header File: 0
Shell Script: 0
Batch Script: 0
Dynamic Link Library: 0
Backup File: 0
Folder: 12
Other: 2
