In [5]:
import fitz  # PyMuPDF
from PIL import Image
import os

def pdf_to_images_high_quality(pdf_path, output_folder, dpi=300, image_format='PNG'):
    """
    Convert PDF to high-quality images without losing text quality.
    
    Parameters:
    -----------
    pdf_path : str
        Path to the input PDF file
    output_folder : str
        Folder where images will be saved
    dpi : int
        Resolution in dots per inch (300 is print quality, 150 is screen quality)
        Higher DPI = better quality but larger file size
    image_format : str
        Output format: 'PNG' (lossless) or 'JPEG' (compressed)
    
    Returns:
    --------
    list : Paths to all generated images
    """
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    
    # Calculate zoom factor for desired DPI
    # PyMuPDF default is 72 DPI, so zoom = desired_dpi / 72
    zoom = dpi / 72
    matrix = fitz.Matrix(zoom, zoom)
    
    image_paths = []
    
    # Iterate through each page
    for page_num in range(len(pdf_document)):
        # Get the page
        page = pdf_document[page_num]
        
        # Render page to an image with high quality
        pix = page.get_pixmap(matrix=matrix, alpha=False)
        
        # Generate output filename
        output_filename = f"page_{page_num + 1:04d}.{image_format.lower()}"
        output_path = os.path.join(output_folder, output_filename)
        
        # Save the image
        if image_format.upper() == 'PNG':
            pix.save(output_path)
        else:
            # For JPEG, convert through PIL for better quality control
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            img.save(output_path, quality=95, optimize=True)
        
        image_paths.append(output_path)
        print(f"Saved: {output_path}")
    
    pdf_document.close()
    
    return image_paths


def pdf_to_images_pdf2image(pdf_path, output_folder, dpi=300):
    """
    Alternative method using pdf2image (requires Poppler).
    Often produces sharper text rendering.
    
    Parameters:
    -----------
    pdf_path : str
        Path to the input PDF file
    output_folder : str
        Folder where images will be saved
    dpi : int
        Resolution in dots per inch
    
    Returns:
    --------
    list : Paths to all generated images
    """
    from pdf2image import convert_from_path
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Convert PDF to images
    images = convert_from_path(
        pdf_path,
        dpi=dpi,
        fmt='png',
        thread_count=4,  # Use multiple threads for faster conversion
        use_pdftocairo=True  # Better text rendering
    )
    
    image_paths = []
    
    # Save each page
    for i, image in enumerate(images):
        output_filename = f"page_{i + 1:04d}.png"
        output_path = os.path.join(output_folder, output_filename)
        image.save(output_path, 'PNG', optimize=False)
        image_paths.append(output_path)
        print(f"Saved: {output_path}")
    
    return image_paths


# Example usage
if __name__ == "__main__":
    # Method 1: Using PyMuPDF (faster, no external dependencies)
    pdf_file = "assignment.pdf"
    output_dir = "output_images"
    
    print("Converting PDF to images...")
    images = pdf_to_images_high_quality(
        pdf_path=pdf_file,
        output_folder=output_dir,
        dpi=300,  # Use 300 for print quality, 150-200 for screen
        image_format='PNG'  # PNG for lossless quality
    )
    
    print(f"\nConversion complete! {len(images)} images saved.")
    
    # Method 2: Using pdf2image (uncomment to use)
    # output_dir2 = "output_images_pdf2image"
    # images2 = pdf_to_images_pdf2image(
    #     pdf_path=pdf_file,
    #     output_folder=output_dir2,
    #     dpi=300
    # )

Converting PDF to images...
Saved: output_images\page_0001.png

Conversion complete! 1 images saved.


In [6]:
import pytesseract
from PIL import Image

# Path to tesseract.exe (only needed on Windows)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Open image
img = Image.open("output_images\page_0001.png")

# Extract text
text = pytesseract.image_to_string(img)
print(text)



Assignment I

Cyber Forensics and Cyber Crime Investigation (OAE415T)

Q1- Give detailed classification of cybercrimes.
Q2- Explain Phishing and its forms.
Q3- Explain crimes related to IPRs. Discuss their legal aspect in India.

Q4- Discuss the Indian legal remediations available for cyber stalking and cyber pornography.



In [9]:
"""
Poetic Text-to-Speech Converter
Creates expressive, poetic-sounding speech from any text
"""

import asyncio
import edge_tts
from pydub import AudioSegment
from pydub.effects import normalize
import os
import nest_asyncio

# Fix for Jupyter Notebook / environments with existing event loop
nest_asyncio.apply()


def text_to_poetic_speech(text, output_file='poetic_speech.mp3', language='en'):
    """
    Convert any text to poetic-sounding speech with dramatic tone.
    
    Parameters:
    -----------
    text : str
        Input text/string to convert to speech
    output_file : str
        Output audio file path (.mp3)
    language : str
        'en' for English, 'hi' for Hindi
    
    Example:
    --------
    text_to_poetic_speech("The moon whispers secrets to the sleeping earth")
    """
    
    # Best poetic voices for each language
    POETIC_VOICES = {
        'en': 'en-US-AriaNeural',      # Expressive female voice
        'hi': 'hi-IN-SwaraNeural',     # Hindi female voice
        'en-male': 'en-US-GuyNeural',  # Deep male voice
        'en-uk': 'en-GB-SoniaNeural'   # British elegant voice
    }
    
    voice = POETIC_VOICES.get(language, 'en-US-AriaNeural')
    
    # Create SSML for poetic delivery
    ssml_text = create_poetic_ssml(text, voice)
    
    # Generate speech
    temp_file = 'temp_speech.mp3'
    run_async(generate_speech_async(ssml_text, temp_file, voice))
    
    # Apply poetic audio effects
    apply_poetic_effects(temp_file, output_file)
    
    # Clean up temp file
    if os.path.exists(temp_file):
        os.remove(temp_file)
    
    print(f"✓ Poetic speech created: {output_file}")
    return output_file


def create_poetic_ssml(text, voice):
    """
    Create SSML markup to add poetic qualities:
    - Slower pace
    - Strategic pauses
    - Emphasis on key words
    - Varied pitch and prosody
    """
    
    # Add pauses at punctuation for dramatic effect
    text = text.replace('.', '<break time="800ms"/>.')
    text = text.replace(',', '<break time="400ms"/>,')
    text = text.replace('!', '<break time="700ms"/>!')
    text = text.replace('?', '<break time="700ms"/>?')
    
    # Wrap in SSML with prosody controls
    ssml = f'''
    <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">
        <voice name="{voice}">
            <prosody rate="0.85" pitch="+5%">
                {text}
            </prosody>
        </voice>
    </speak>
    '''
    
    return ssml


async def generate_speech_async(ssml_text, output_file, voice):
    """Generate speech using Edge TTS asynchronously."""
    communicate = edge_tts.Communicate(ssml_text, voice)
    await communicate.save(output_file)


def run_async(coro):
    """Helper to run async code in any environment (Jupyter, IDE, etc.)"""
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    
    return loop.run_until_complete(coro)


def apply_poetic_effects(input_file, output_file):
    """
    Apply audio effects for poetic atmosphere:
    - Slight reverb simulation
    - Gentle fade in/out
    - Volume normalization
    - Slight tempo reduction
    """
    
    # Load audio
    audio = AudioSegment.from_file(input_file)
    
    # Reduce tempo by 5% for more contemplative feel
    slower = audio._spawn(audio.raw_data, overrides={
        "frame_rate": int(audio.frame_rate * 0.95)
    }).set_frame_rate(audio.frame_rate)
    
    # Normalize volume for consistency
    normalized = normalize(slower)
    
    # Add gentle fade in (500ms) and fade out (1500ms)
    with_fades = normalized.fade_in(500).fade_out(1500)
    
    # Boost lower frequencies slightly for warmth
    warmer = with_fades.low_pass_filter(3000)
    
    # Export final audio
    warmer.export(output_file, format='mp3', bitrate='192k')


def text_to_poetic_speech_simple(text, output_file='poetic_speech.mp3'):
    """
    Simplified version using only edge-tts (no audio processing).
    Faster but slightly less control over poetic effect.
    
    Parameters:
    -----------
    text : str
        Input text to convert
    output_file : str
        Output audio file path
    """
    
    voice = 'en-US-AriaNeural'
    
    # Add strategic pauses for poetic rhythm
    enhanced_text = text.replace('.', '... ')
    enhanced_text = enhanced_text.replace(',', ', ')
    enhanced_text = enhanced_text.replace('!', '! ')
    enhanced_text = enhanced_text.replace('?', '? ')
    
    async def generate():
        communicate = edge_tts.Communicate(enhanced_text, voice, rate='-15%', pitch='+5Hz')
        await communicate.save(output_file)
    
    run_async(generate())
    print(f"✓ Poetic speech created: {output_file}")
    return output_file


def batch_convert_to_poetic_speech(text_list, output_folder='poetic_outputs'):
    """
    Convert multiple texts to poetic speech.
    
    Parameters:
    -----------
    text_list : list
        List of text strings to convert
    output_folder : str
        Folder to save output files
    """
    
    os.makedirs(output_folder, exist_ok=True)
    
    for i, text in enumerate(text_list):
        output_file = os.path.join(output_folder, f'poetic_speech_{i+1}.mp3')
        text_to_poetic_speech(text, output_file)
    
    print(f"\n✓ Converted {len(text_list)} texts to poetic speech!")


# Example usage
if __name__ == "__main__":
    
    # Example 1: Simple text to poetic speech
    text1 = "The stars danced quietly in the velvet sky, whispering ancient secrets to those who dare to listen."
    text_to_poetic_speech(text1, 'example1.mp3')
    
    # Example 2: Hindi text
    text2 = "चाँद की चांदनी में खो जाते हैं सपने, रात की गहराइयों में।"
    text_to_poetic_speech(text2, 'example2_hindi.mp3', language='hi')
    
    # Example 3: Faster method without audio effects
    text3 = "In the silence of the night, dreams take flight."
    text_to_poetic_speech_simple(text3, 'example3_simple.mp3')
    
    # Example 4: Batch conversion
    poems = [
        "The river flows with memories of time.",
        "Mountains stand tall, guardians of the earth.",
        "Winds carry tales from distant lands."
    ]
    batch_convert_to_poetic_speech(poems)
    
    print("\n" + "="*50)
    print("All examples generated successfully!")
    print("="*50)

✓ Poetic speech created: example1.mp3
✓ Poetic speech created: example2_hindi.mp3
✓ Poetic speech created: example3_simple.mp3
✓ Poetic speech created: poetic_outputs\poetic_speech_1.mp3
✓ Poetic speech created: poetic_outputs\poetic_speech_2.mp3
✓ Poetic speech created: poetic_outputs\poetic_speech_3.mp3

✓ Converted 3 texts to poetic speech!

All examples generated successfully!
