In [1]:
import os
import zipfile
from pathlib import Path
import datetime
from tqdm import tqdm
import shutil


In [2]:
# Configuration
source_directory = "."
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"deeplearning_ai_course_backup_{timestamp}.zip"

# Files and directories to exclude (optional)
exclude_patterns = {
    '.git',
    '__pycache__',
    '.DS_Store',
    '*.pyc',
    '.ipynb_checkpoints',
    'node_modules',
    '.env'
}

print(f"Source directory: {os.path.abspath(source_directory)}")
print(f"Zip filename: {zip_filename}")
print(f"Excluding patterns: {exclude_patterns}")


Source directory: /home/jovyan/work
Zip filename: deeplearning_ai_course_backup_20250705_204054.zip
Excluding patterns: {'.git', '*.pyc', '__pycache__', '.env', '.ipynb_checkpoints', '.DS_Store', 'node_modules'}


In [3]:
def should_exclude(file_path, exclude_patterns):
    """Check if a file should be excluded based on patterns."""
    path_parts = Path(file_path).parts
    
    for pattern in exclude_patterns:
        if pattern.startswith('*'):
            # Handle wildcard patterns
            if any(part.endswith(pattern[1:]) for part in path_parts):
                return True
        else:
            # Handle exact matches
            if pattern in path_parts:
                return True
    return False

def get_all_files(directory, exclude_patterns=None):
    """Get all files in directory and subdirectories, excluding specified patterns."""
    if exclude_patterns is None:
        exclude_patterns = set()
    
    all_files = []
    
    for root, dirs, files in os.walk(directory):
        # Skip directories that match exclude patterns
        dirs[:] = [d for d in dirs if not should_exclude(os.path.join(root, d), exclude_patterns)]
        
        for file in files:
            file_path = os.path.join(root, file)
            if not should_exclude(file_path, exclude_patterns):
                all_files.append(file_path)
    
    return all_files

# Get all files to be zipped
print("Scanning for files...")
files_to_zip = get_all_files(source_directory, exclude_patterns)
print(f"Found {len(files_to_zip)} files to zip")


Scanning for files...
Found 183 files to zip


In [4]:
# Display some statistics
total_size = 0
file_types = {}

for file_path in files_to_zip:
    try:
        size = os.path.getsize(file_path)
        total_size += size
        
        # Count file types
        ext = Path(file_path).suffix.lower()
        file_types[ext] = file_types.get(ext, 0) + 1
    except OSError:
        print(f"Warning: Could not access {file_path}")

print(f"\nTotal size: {total_size / (1024*1024):.2f} MB")
print(f"\nFile types (top 10):")
for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"  {ext if ext else '(no extension)'}: {count} files")



Total size: 45.86 MB

File types (top 10):
  .py: 47 files
  .md: 26 files
  .png: 23 files
  .txt: 18 files
  .json: 17 files
  .pdf: 12 files
  (no extension): 11 files
  .ipynb: 8 files
  .jpg: 6 files
  .yaml: 3 files
