In [85]:
import os
import hashlib
import pytsk3
from PIL import Image
from PIL.ExifTags import TAGS
import pandas as pd

# ------------------------------
# Step 1: Create a Disk Image
# ------------------------------

def create_disk_image(source_path, output_path):
    """Create a disk image."""
    try:
        with open(source_path, 'rb') as source, open(output_path, 'wb') as dest:
            dest.write(source.read())
        print(f"Disk image created at: {output_path}")
    except Exception as e:
        print(f"Error creating disk image: {e}")

# Example Usage:
# create_disk_image('source_drive.img', 'disk_image.img')

# ------------------------------
# Step 2: Verify Data Integrity
# ------------------------------

def calculate_hash(file_path, algorithm='sha256'):
    """Calculate hash of a file."""
    hash_func = hashlib.new(algorithm)
    with open(file_path, 'rb') as file:
        while chunk := file.read(8192):
            hash_func.update(chunk)
    return hash_func.hexdigest()

# Example Usage:
# hash_value = calculate_hash('disk_image.img')
# print("SHA256 Hash:", hash_value)

# ------------------------------
# Step 3: Recover Deleted Files
# ------------------------------

def recover_deleted_files(disk_image_path, output_dir):
    """Recover deleted files from a disk image."""
    try:
        img = pytsk3.Img_Info(disk_image_path)
        fs = pytsk3.FS_Info(img)
        for file_entry in fs.open_dir('/'):
            if file_entry.info.meta and file_entry.info.meta.flags & pytsk3.TSK_FS_META_FLAG_UNALLOC:
                file_name = file_entry.info.name.name.decode()
                file_path = os.path.join(output_dir, file_name)
                with open(file_path, 'wb') as f:
                    f.write(file_entry.read_random(0, file_entry.info.meta.size))
                print(f"Recovered: {file_name}")
    except Exception as e:
        print(f"Error recovering files: {e}")

# Example Usage:
# recover_deleted_files('disk_image.img', 'recovered_files')

# ------------------------------
# Step 4: Extract Metadata
# ------------------------------

def extract_metadata(file_path):
    """Extract metadata from an image."""
    try:
        img = Image.open(file_path)
        exif_data = img._getexif()
        metadata = {TAGS.get(tag): value for tag, value in exif_data.items()} if exif_data else {}
        return metadata
    except Exception as e:
        print(f"Error extracting metadata: {e}")
        return {}

# Example Usage:
# metadata = extract_metadata('recovered_files/image.jpg')
# print(metadata)

# ------------------------------
# Step 5: Generate Report
# ------------------------------

def generate_report(data, output_file):
    """Generate a report of findings."""
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)
    print(f"Report generated at: {output_file}")

# Example Usage:
# data = [{'File Name': 'image.jpg', 'Size': '1MB', 'Recovered': 'Yes'}]
# generate_report(data, 'report.csv')

# ------------------------------
# Main Execution Flow
# ------------------------------

if __name__ == "__main__":
    # Paths
    disk_image_path = r'C:\Users\DataScienceClass\Desktop\disk_image.'  # Correct path to store the disk image
    output_dir = r'C:\Users\DataScienceClass\project\recovered_file'  # Correct path for recovered files
    report_file = r'C:\Users\DataScienceClass\project\reports\evidence_report.csv'  # Correct path for report

    # Ensure output directories exist
    os.makedirs(output_dir, exist_ok=True)  # Ensure recovered file directory exists
    os.makedirs(os.path.dirname(report_file), exist_ok=True)  # Ensure report directory exists

    # Step 1: Create a Disk Image (Simulated)
    create_disk_image(r'C:\Users\DataScienceClass\project\image.png', disk_image_path)

    # Step 2: Verify Data Integrity
    hash_value = calculate_hash(disk_image_path)
    print("SHA256 Hash of Disk Image:", hash_value)

    # Step 3: Recover Deleted Files
    recover_deleted_files(disk_image_path, output_dir)

    # Step 4: Extract Metadata from Recovered Files
    metadata_list = []
    for root, _, files in os.walk(output_dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            metadata = extract_metadata(file_path)
            metadata_list.append({'File Name': file_name, 'Metadata': metadata})

    # Step 5: Generate Report
    generate_report(metadata_list, report_file)

import os
import pytsk3

def recover_deleted_files(disk_image_path, output_dir):
    """Recover deleted PDF files from a disk image."""
    try:
        img = pytsk3.Img_Info(disk_image_path)
        fs = pytsk3.FS_Info(img)

        # Loop through directories in the disk image
        for file_entry in fs.open_dir('/'):
            if file_entry.info.meta and file_entry.info.meta.flags & pytsk3.TSK_FS_META_FLAG_UNALLOC:
                file_name = file_entry.info.name.name.decode()

                # Check if the file is a PDF (by file extension)
                if file_name.lower().endswith('.pdf'):
                    file_path = os.path.join(output_dir, file_name)
                    with open(file_path, 'wb') as f:
                        f.write(file_entry.read_random(0, file_entry.info.meta.size))
                    print(f"Recovered PDF: {file_name}")
    except Exception as e:
        print(f"Error recovering files: {e}")

# Example Usage:
# recover_deleted_files('disk_image.img', 'recovered_files')

Disk image created at: C:\Users\DataScienceClass\Desktop\disk_image.
SHA256 Hash of Disk Image: 7b45edbc7ec19668c89fac160ffecc3399f2ff2a17abf44468d0580bb2dcb234
Error recovering files: FS_Info_Con: (tsk3.cpp:214) Unable to open the image as a filesystem at offset: 0x00000000 with error: Possible encryption detected (High entropy (7.99))
Report generated at: C:\Users\DataScienceClass\project\reports\evidence_report.csv
