In [None]:
import os
import zipfile
from pathlib import Path
import datetime
from tqdm import tqdm
import shutil


In [None]:
# Configuration
source_directory = "."
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"deeplearning_ai_course_backup_{timestamp}.zip"

# Files and directories to exclude (optional)
exclude_patterns = {
    '.git',
    '__pycache__',
    '.DS_Store',
    '*.pyc',
    '.ipynb_checkpoints',
    'node_modules',
    '.env'
}

print(f"Source directory: {os.path.abspath(source_directory)}")
print(f"Zip filename: {zip_filename}")
print(f"Excluding patterns: {exclude_patterns}")


In [None]:
def should_exclude(file_path, exclude_patterns):
    """Check if a file should be excluded based on patterns."""
    path_parts = Path(file_path).parts
    
    for pattern in exclude_patterns:
        if pattern.startswith('*'):
            # Handle wildcard patterns
            if any(part.endswith(pattern[1:]) for part in path_parts):
                return True
        else:
            # Handle exact matches
            if pattern in path_parts:
                return True
    return False

def get_all_files(directory, exclude_patterns=None):
    """Get all files in directory and subdirectories, excluding specified patterns."""
    if exclude_patterns is None:
        exclude_patterns = set()
    
    all_files = []
    
    for root, dirs, files in os.walk(directory):
        # Skip directories that match exclude patterns
        dirs[:] = [d for d in dirs if not should_exclude(os.path.join(root, d), exclude_patterns)]
        
        for file in files:
            file_path = os.path.join(root, file)
            if not should_exclude(file_path, exclude_patterns):
                all_files.append(file_path)
    
    return all_files

# Get all files to be zipped
print("Scanning for files...")
files_to_zip = get_all_files(source_directory, exclude_patterns)
print(f"Found {len(files_to_zip)} files to zip")


In [None]:
# Display some statistics
total_size = 0
file_types = {}

for file_path in files_to_zip:
    try:
        size = os.path.getsize(file_path)
        total_size += size
        
        # Count file types
        ext = Path(file_path).suffix.lower()
        file_types[ext] = file_types.get(ext, 0) + 1
    except OSError:
        print(f"Warning: Could not access {file_path}")

print(f"\nTotal size: {total_size / (1024*1024):.2f} MB")
print(f"\nFile types (top 10):")
for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"  {ext if ext else '(no extension)'}: {count} files")


In [None]:
# Create the zip file
print(f"\nCreating zip file: {zip_filename}")
print("This may take a while depending on the number and size of files...")

try:
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED, compresslevel=6) as zipf:
        for file_path in tqdm(files_to_zip, desc="Zipping files"):
            try:
                # Use relative path in the zip file
                arcname = os.path.relpath(file_path, source_directory)
                zipf.write(file_path, arcname)
            except Exception as e:
                print(f"Warning: Could not zip {file_path}: {e}")
    
    print(f"\nZip file created successfully: {zip_filename}")
    
    # Show final zip file size
    zip_size = os.path.getsize(zip_filename)
    print(f"Zip file size: {zip_size / (1024*1024):.2f} MB")
    print(f"Compression ratio: {((total_size - zip_size) / total_size * 100):.1f}%")
    
except Exception as e:
    print(f"Error creating zip file: {e}")


In [None]:
# Optional: Verify the zip file
print("\nVerifying zip file integrity...")
try:
    with zipfile.ZipFile(zip_filename, 'r') as zipf:
        bad_files = zipf.testzip()
        if bad_files:
            print(f"Warning: Found corrupted files in zip: {bad_files}")
        else:
            print("✓ Zip file integrity verified successfully")
            
        # Show some info about the zip contents
        info_list = zipf.infolist()
        print(f"\nZip contents: {len(info_list)} files")
        print(f"Total uncompressed size: {sum(info.file_size for info in info_list) / (1024*1024):.2f} MB")
        print(f"Total compressed size: {sum(info.compress_size for info in info_list) / (1024*1024):.2f} MB")
        
except Exception as e:
    print(f"Error verifying zip file: {e}")


In [None]:
# Optional: Create a manifest file listing all included files
manifest_filename = f"manifest_{timestamp}.txt"
print(f"\nCreating manifest file: {manifest_filename}")

try:
    with open(manifest_filename, 'w', encoding='utf-8') as f:
        f.write(f"Backup created on: {datetime.datetime.now()}\n")
        f.write(f"Source directory: {os.path.abspath(source_directory)}\n")
        f.write(f"Zip file: {zip_filename}\n")
        f.write(f"Total files: {len(files_to_zip)}\n")
        f.write(f"Total size: {total_size / (1024*1024):.2f} MB\n")
        f.write(f"Excluded patterns: {', '.join(exclude_patterns)}\n")
        f.write("\n" + "="*50 + "\n")
        f.write("Files included in backup:\n")
        f.write("="*50 + "\n")
        
        for file_path in sorted(files_to_zip):
            rel_path = os.path.relpath(file_path, source_directory)
            try:
                size = os.path.getsize(file_path)
                f.write(f"{rel_path} ({size} bytes)\n")
            except OSError:
                f.write(f"{rel_path} (size unknown)\n")
    
    print(f"✓ Manifest file created: {manifest_filename}")
    
except Exception as e:
    print(f"Error creating manifest: {e}")


In [None]:
# Summary
print("\n" + "="*60)
print("BACKUP COMPLETE")
print("="*60)
print(f"✓ Zip file: {zip_filename}")
print(f"✓ Manifest: {manifest_filename}")
print(f"✓ Files backed up: {len(files_to_zip)}")
print(f"✓ Original size: {total_size / (1024*1024):.2f} MB")
if 'zip_size' in locals():
    print(f"✓ Compressed size: {zip_size / (1024*1024):.2f} MB")
    print(f"✓ Space saved: {((total_size - zip_size) / total_size * 100):.1f}%")
print("\nYour backup is ready!")
