In [1]:
import os
import shutil
import logging
import json
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

In [2]:
# Exceptions
class FileNotFoundError(Exception):
    def __init__(self, message="Input file not found. Please provide a valid file path."):
        self.message = message
        super().__init__(self.message)

class InvalidInputDataError(Exception):
    def __init__(self, message="Invalid input data encountered during processing."):
        self.message = message
        super().__init__(self.message)

class DiskSpaceFullError(Exception):
    def __init__(self, message="Insufficient disk space to write the output file."):
        self.message = message
        super().__init__(self.message)

In [3]:
# Reading the input file
def read_input_file(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError()
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    return content

In [4]:
# counting words and calculating character frequencies
def process_text(content):
    if not isinstance(content, str):
        raise InvalidInputDataError()

    words = content.split()
    word_count = len(words)
    char_frequency = Counter(char for char in content.lower() if char.isalpha())

    result = {
        "word_count": word_count,
        "char_frequency": dict(char_frequency),
    }
    return result

In [5]:
# Output file
def save_output(output_file, result):
    try:
        total, used, free = shutil.disk_usage("/")
        if free < 1048576:  
            raise DiskSpaceFullError()

        with open(output_file, 'w', encoding='utf-8') as file:
            json.dump(result, file, indent=4)

    except DiskSpaceFullError as e:
        logging.error(f"Error writing to file {output_file}: {e}")
        raise

In [6]:
# Generating a word cloud from the text
def generate_word_cloud(text_data, output_image_path):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_data)
    
    wordcloud.to_file(output_image_path)  
    
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')  
    plt.show()

In [1]:
try:
    input_file = input("Enter the input file path: ")
    output_file = input("Enter the output file path: ")
    wordcloud_image_path = "/Users/maitripatel/Desktop/Codes/Python/Advanced_Python_Programming/Class-8/Data.png"

    content = read_input_file(input_file)

    result = process_text(content)

    save_output(output_file, result)
    print(f"Processing complete! Results saved to {output_file}")

    generate_word_cloud(content, wordcloud_image_path)
    print(f"Word cloud image successfully saved to '{wordcloud_image_path}'.")

except FileNotFoundError as e:
    logging.error(e)
    print(e)

except InvalidInputDataError as e:
    logging.error(e)
    print(e)

except DiskSpaceFullError as e:
    logging.error(e)
    print(e)

except Exception as e:
    logging.error(f"Unexpected error: {e}")
    print(f"An unexpected error occurred: {e}")

NameError: name 'InvalidInputDataError' is not defined

In [2]:
import os
import shutil
import logging
import json
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Configure logging
log_file = 'textprocessing.log'

# Set up the root logger
logging.basicConfig(
    level=logging.DEBUG,  # Minimum log level to capture for both file and console
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Create a file handler to log to a separate file
file_handler = logging.FileHandler(log_file, mode='w')  # Overwrite log file on each run
file_handler.setLevel(logging.DEBUG)  # Capture all levels for the log file

# Create a console handler to log to the console (stdout)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)  # Capture all levels for the console

# Create a logging format for both the file and console handler
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Add both handlers to the root logger
logging.getLogger().addHandler(file_handler)
logging.getLogger().addHandler(console_handler)

# Custom exceptions
class FileNotFoundError(Exception):
    def __init__(self, message="Input file not found. Please provide a valid file path."):
        self.message = message
        super().__init__(self.message)

class InvalidInputDataError(Exception):
    def __init__(self, message="Invalid input data encountered during processing."):
        self.message = message
        super().__init__(self.message)

class DiskSpaceFullError(Exception):
    def __init__(self, message="Insufficient disk space to write the output file."):
        self.message = message
        super().__init__(self.message)

# Reading the input file
def read_input_file(file_path):
    logging.debug(f"Attempting to read input file from: {file_path}")
    if not os.path.exists(file_path):
        logging.error(f"FileNotFoundError: The file at {file_path} does not exist.")
        raise FileNotFoundError()
    
    logging.info(f"Reading file content from {file_path}")
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    logging.debug(f"File read successfully: {file_path}")
    return content

# Counting words and calculating character frequencies
def process_text(content):
    logging.debug("Starting text processing...")
    if not isinstance(content, str):
        logging.error("InvalidInputDataError: Content is not of type string.")
        raise InvalidInputDataError()
    
    words = content.split()
    word_count = len(words)
    char_frequency = Counter(char for char in content.lower() if char.isalpha())
    
    logging.info(f"Word count: {word_count}, Character frequencies: {char_frequency}")
    
    result = {
        "word_count": word_count,
        "char_frequency": dict(char_frequency),
    }
    
    logging.debug("Text processing complete.")
    return result

# Saving output to file
def save_output(output_file, result):
    logging.debug(f"Attempting to save output to {output_file}")
    try:
        total, used, free = shutil.disk_usage("/")
        logging.info(f"Disk space available: {free} bytes")
        
        if free < 1048576:  # 1MB = 1048576 bytes
            logging.critical("DiskSpaceFullError: Not enough disk space to write the output.")
            raise DiskSpaceFullError()

        with open(output_file, 'w', encoding='utf-8') as file:
            json.dump(result, file, indent=4)
        
        logging.info(f"Results successfully saved to {output_file}")
    
    except DiskSpaceFullError as e:
        logging.error(f"Error writing to file {output_file}: {e}")
        raise

# Generating a word cloud from the text
def generate_word_cloud(text_data, output_image_path):
    logging.debug("Starting word cloud generation...")
    try:
        wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_data)
        wordcloud.to_file(output_image_path)
        
        logging.info(f"Word cloud image successfully saved to '{output_image_path}'.")
        
        plt.figure(figsize=(10, 5))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')  # No axis for the word cloud
        plt.show()
        
    except Exception as e:
        logging.error(f"Error generating word cloud: {e}")
        raise

# Main script execution
try:
    logging.info("Starting the main script execution...")
    
    input_file = input("Enter the input file path: ")
    output_file = input("Enter the output file path: ")
    wordcloud_image_path = "/Users/maitripatel/Desktop/Codes/Python/Advanced_Python_Programming/Class-8/Data.png"
    
    # Read input file
    content = read_input_file(input_file)
    
    # Process the text content
    result = process_text(content)
    
    # Save the processed results to a JSON file
    save_output(output_file, result)
    logging.info(f"Processing complete! Results saved to {output_file}")
    
    # Generate word cloud and save it as an image
    generate_word_cloud(content, wordcloud_image_path)
    logging.info(f"Word cloud image successfully saved to '{wordcloud_image_path}'.")

except FileNotFoundError as e:
    logging.error(e)
    print(e)

except InvalidInputDataError as e:
    logging.error(e)
    print(e)

except DiskSpaceFullError as e:
    logging.error(e)
    print(e)

except Exception as e:
    logging.error(f"Unexpected error: {e}")
    print(f"An unexpected error occurred: {e}")

logging.info("Script execution finished.")


2024-10-07 15:28:43,530 - INFO - Starting the main script execution...
2024-10-07 15:28:43,530 - INFO - Starting the main script execution...
2024-10-07 15:28:46,342 - DEBUG - Attempting to read input file from: hciuweuc
2024-10-07 15:28:46,342 - DEBUG - Attempting to read input file from: hciuweuc
2024-10-07 15:28:46,345 - ERROR - FileNotFoundError: The file at hciuweuc does not exist.
2024-10-07 15:28:46,345 - ERROR - FileNotFoundError: The file at hciuweuc does not exist.
2024-10-07 15:28:46,381 - ERROR - Input file not found. Please provide a valid file path.
2024-10-07 15:28:46,381 - ERROR - Input file not found. Please provide a valid file path.
2024-10-07 15:28:46,394 - INFO - Script execution finished.
2024-10-07 15:28:46,394 - INFO - Script execution finished.


Input file not found. Please provide a valid file path.
