# Lab Activity 5: File Handling in Python


## **Name:** Tarun Hariharan

## **Roll No:** 2024113009

#### **Note:**

- You must implement exception handling using try and except blocks whenever reading or writing. You will be awarded zero marks if you do not implement exception handling.

- When reading and writing files, always specify the encoding as `utf-8` to avoid issues with special characters.  

### Reverse File [2]

Write a function `reverse_file` that reads a file and writes its contents in reverse order to a new file. The function should take two arguments: the name of the input file and the name of the output file. The function should return `True` if the operation was successful and `False` otherwise.

#### Example:
```python
# Your output should look like this
success = reverse_file('input.txt', 'output.txt')
print(success)  # Output: True

#### Enter your Roll Number as an Integer:

In [3]:
ROLL_NUMBER = 2024113009

In [4]:
import os

os.makedirs(str(ROLL_NUMBER), exist_ok=True)

In [11]:
def reverse_file(input_filename, output_filename):
    """
    Reverse the contents of a text file and write the result to another file.

    Args:
        input_filename (str): The name of the input file to read.
        output_filename (str): The name of the output file to write the reversed content.

    Returns:
        bool: True if the write was successful, False if the write failed.
    """
    # your code goes here
    # Return True if the write was successful, False if the write failed.
    # delete the pass statement below before writing your code
    # pass
    try:
        with open(input_filename, 'r', encoding='utf-8') as inpFile:
            lines = inpFile.read()
            #revLines = [line[::-1] for line in reversed(lines)]
        with open(output_filename, 'w', encoding='utf-8') as outFile:
            #outFile.write('\n'.join(revLines)+'\n')
            outFile.write(lines[::-1])
            return True
    except Exception as E:
        print(f"Error happened: {E}")
        return False

def main():
    assert os.path.exists(str(ROLL_NUMBER)) == True, 'Submission directory not created'
    assert os.path.exists('moby_dick.txt') == True, 'Input file does not exist'
    
    reverse_file('moby_dick.txt', f'{ROLL_NUMBER}/moby_dick_reverse.txt')
    
    assert os.path.exists(f'{ROLL_NUMBER}/moby_dick_reverse.txt') == True, "Output File not created"

main()

### Count Characters [2]

Write a function `count_characters` that takes as input a file. The function should return a dictionary with the count of all characters present in the file. The character counting should be case-sensitive.

**Note**: Use of the `Counter` class from the `collections` module **will result in a zero**. You should implement the character counting logic yourself.

#### Example:
```python
# Your output should look like this
# {'a': 12345, 'b': 6789, 'A': 2345, 'B': 789, ...}

In [None]:
import pickle


def count_characters(filename):
    """
    Count the occurrences of each character in a text file.

    Args:
        filename (str): The name of the file to read.

    Returns:
        dict: A dictionary where each key is a character and the corresponding value is the count of that character in the file.
    """
    
    # your code goes here
    
    # Return a dictionary where each character is a key and its count is the corresponding value.

    # delete the pass statement below before writing your code
    #pass

    charCount = {}
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            for l in f:
                for c in l:
                    charCount[c] = charCount.get(c, 0) + 1
        return charCount
    except Exception as E:
        print(f"Error happened: {E}")
        return {}
    


def main():

    assert os.path.exists(str(ROLL_NUMBER)) == True, 'Submission directory not created'
    assert os.path.exists('random_50mb.txt') == True, 'Input file does not exist'

    count_dict = count_characters('random_50mb.txt')
    
    with open(f'{ROLL_NUMBER}/count_dict.pkl', 'wb') as f:
        pickle.dump(count_dict, f)
    
    assert os.path.exists(f'{ROLL_NUMBER}/count_dict.pkl') == True, "Output File not created"

    print(sorted(count_dict.items(), key=lambda x: x[1], reverse=True))

main()


[('Z', 561338), ('2', 560684), (';', 560407), ('t', 560375), ('(', 560310), ('S', 560150), ('{', 560076), (')', 560059), ('E', 559993), ('<', 559958), ('F', 559931), ('|', 559926), ('N', 559921), (']', 559902), ('o', 559899), ('H', 559886), ('1', 559879), ('J', 559838), ('s', 559833), ('w', 559785), ('^', 559775), ('$', 559752), ('r', 559728), ('V', 559722), ('+', 559708), ('Q', 559706), ('0', 559691), ('`', 559689), ('K', 559664), ('v', 559651), ('g', 559651), ('C', 559649), ('*', 559623), ('Y', 559562), ('.', 559553), ('U', 559513), ('/', 559500), ('}', 559495), ('4', 559446), ('m', 559439), ('O', 559433), ('I', 559407), ('#', 559358), ('@', 559350), ('[', 559282), ('T', 559273), ('z', 559269), ('B', 559265), ('~', 559261), ('y', 559244), ('j', 559206), ('!', 559202), (':', 559186), ('5', 559186), ('i', 559178), ('9', 559141), ('7', 559136), ('A', 559123), ('\n', 559083), ('h', 559060), ('l', 559051), ('k', 559027), ('D', 558980), ('M', 558969), ('c', 558872), ('p', 558836), ('_', 55

### Extract Email Addresses [3]

Extract all the unique email addresses as a list from the `email_exchange_big.txt` file.

You'll need to use regular expressions to extract the email addresses from the text file. You can use the `re` module, compile a regular expression pattern using the `re.compile` function, and use the `findall` method to extract all the email addresses.

In [None]:
import re
import pickle

def extract_unique_emails(filename):
    """
    Extract unique email addresses from a text file.

    Args:
        filename (str): The name of the file to read.

    Returns:
        list: A list of unique email addresses found in the file.
    """

    regular_expression = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'

    # your code goes here

    # return a list of unique emails
    
    # delete the pass statement below before writing your code
    # pass
    pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    mails = set()
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            for l in f:
                mails.update(re.findall(pattern, l))
        #print(list(mails))
        return list(mails)
    except Exception as E:
        print(f"Error happened: {E}")
        return []
    


def main():
    
    assert os.path.exists(str(ROLL_NUMBER)) == True, 'Submission directory not created'
    assert os.path.exists('email_exchanges_big.txt') == True, 'Input file does not exist'


    emails = extract_unique_emails('email_exchanges_big.txt')

    sorted_emails = sorted(emails)
    
    with open(f'{ROLL_NUMBER}/emails.pkl', 'wb') as f:
        pickle.dump(sorted_emails, f)

    assert os.path.exists(f'{ROLL_NUMBER}/emails.pkl') == True, "Output File not created"


main()


### Find Most Common Words [3]
Write a function `find_most_common_words` that takes as input a file and an integer `n`. The function should return an array of the `n` most common words in the file, in descending order of word frequency. The function should count the occurrences of words without regard to their case. So, "The", "the", and "THE" would all contribute to the count for the word "the". To implement this, you would typically convert all words to lowercase (or uppercase) before counting their occurrences.

**Note**: 
- Use of the `Counter` class from the `collections` module **will result in a zero**. You should implement the word counting logic yourself.
- In the returned list, words should be in lowercase.

#### Example:
```python
# Your output should look like this
print(find_most_common_words('sample.txt', 10))
# [('the', 10),
#  ('be', 8),
#  ('to', 6),
#  ('of', 6),
#  ('and', 5),
#  ('a', 4),
#  ('in', 4),
#  ('that', 3),
#  ('have', 2),
#  ('I', 2)]

# Your output should look like this
print(find_most_common_words('sample.txt', 5))
# [('the', 10),
#  ('be', 8),
#  ('to', 6),
#  ('of', 6),
#  ('and', 5)]

In [None]:
import re

def find_most_common_words(filename, n):
    """
    Find the most common words in a text file.

    Args:
        filename (str): The name of the file to read.
        n (int): The number of most common words to return.

    Returns:
        list of tuples: A list of tuples where each tuple contains a word and its frequency, 
                        sorted by frequency in descending order. The list contains at most `n` tuples.
    """


    regular_expression = r'\b\w+\b' # Regular expression to find all words

    # your code goes here

    # return a list of tuples as given above in the example
    
    # delete the pass statement below before writing your code
    # pass

    pattern = r'\b\w+\b'
    wordsTup = {}
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            for l in f:
                words = re.findall(pattern, l.lower())
                for w in words:
                    wordsTup[w] = wordsTup.get(w, 0)+1
        return sorted(wordsTup.items(), key = lambda x:x[1], reverse=True)[:n]
    except Exception as E:
        print(f"Error happened: {E}")
        return []

def main():
    
    most_common_words = find_most_common_words('war_and_peace.txt', 10)

    print(most_common_words)

    for word, _ in most_common_words:
        assert word.islower() == True, "Words should be in lowercase"

    with open(f'{ROLL_NUMBER}/most_common_words.pkl', 'wb') as f:
        pickle.dump(most_common_words, f)

    assert os.path.exists(f'{ROLL_NUMBER}/most_common_words.pkl') == True, "Output File not created"

    most_common_words_100 = find_most_common_words('war_and_peace.txt', 100)

    for word, _ in most_common_words_100:
        assert word.islower() == True, "Words should be in lowercase"

    with open(f'{ROLL_NUMBER}/most_common_words_100.pkl', 'wb') as f:
        pickle.dump(most_common_words_100, f)

    assert os.path.exists(f'{ROLL_NUMBER}/most_common_words_100.pkl') == True, "Output File not created"

main()

[('the', 34545), ('and', 22226), ('to', 16675), ('of', 14889), ('a', 10550), ('he', 10001), ('in', 8979), ('that', 8190), ('his', 7984), ('was', 7359)]



### Merge Two Files [5]

Write a function `merge_files` that reads two text files and merges their contents into a third file. Each line from both files should be alternately written to the output file. If one file has more lines than the other, append the remaining lines at the end. The function should take three arguments: the names of the two input files and the name of the output file. It should return `True` if the operation was successful and `False` otherwise.

#### Example:
```python
# Your output should look like this
success = merge_files('file1.txt', 'file2.txt', 'merged.txt')
print(success)  # Output: True
```

You must implement exception handling using try and except blocks.
    

In [21]:
import os

def merge_files(file1, file2, output_file):
    """
    Reads two text files and merges their contents alternately into a third file.
    If one file has more lines than the other, the remaining lines are appended.

    Args:
        file1 (str): The name of the first input file.
        file2 (str): The name of the second input file.
        output_file (str): The name of the output file to write the merged content.

    Returns:
        bool: True if the operation is successful, False otherwise.
    """
    # your code goes here
    # Return True if the operation is successful, False otherwise.
    # delete the pass statement below before writing your code
    #pass

    try:
        with open(file1, 'r', encoding='utf-8') as f1, open(file2, 'r', encoding='utf-8') as f2:
            f1Lines = f1.readlines()
            f2Lines = f2.readlines()

        finalLines = []
        for i in range(max(len(f1Lines), len(f2Lines))):
            if i < len(f1Lines):
                finalLines.append(f1Lines[i])
            if i < len(f2Lines):
                finalLines.append(f2Lines[i])
        with open(output_file, 'w', encoding='utf-8') as opFile:
            opFile.writelines(finalLines)
        return True
    except Exception as E:
        print(f"Error happened: {E}")
        return False

    

def main():
    os.makedirs(str(ROLL_NUMBER), exist_ok=True)
    assert os.path.exists(str(ROLL_NUMBER)) == True, 'Submission directory not created'
    assert os.path.exists('file1.txt') == True, 'Input file 1 does not exist'
    assert os.path.exists('file2.txt') == True, 'Input file 2 does not exist'
    file1 = 'file1.txt'
    file2 = 'file2.txt'
    output_file = f'{ROLL_NUMBER}/merged.txt'
    success = merge_files(file1, file2, output_file)
    print(success)
    assert os.path.exists(output_file), "Output File not created"

main()

True



### Log File Analyzer [5]

Write a function `analyze_logs` that processes a server log file and extracts useful statistics. The log file contains lines in the format:

```
[TIMESTAMP] - [LOG_LEVEL] - [MESSAGE]
```

Example log file (`server.log`):
```
[2025-02-17 12:30:45] - INFO - Server started successfully
[2025-02-17 12:32:10] - ERROR - Database connection failed
[2025-02-17 12:35:22] - WARNING - High memory usage detected
[2025-02-17 12:40:05] - INFO - User logged in
[2025-02-17 12:42:50] - ERROR - Failed to process payment
```

#### **Task**

Implement `analyze_logs(log_file)`, which:

- Counts occurrences of each log level (INFO, ERROR, WARNING)
- Finds the most recent log entry of each log level
- Handles exceptions (e.g., file not found, format errors)
- Returns a dictionary with log statistics

#### Example Output:
```
{
    'counts': {'INFO': 2, 'ERROR': 2, 'WARNING': 1},
    'last_logs': {
        'INFO': '[2025-02-17 12:40:05] - INFO - User logged in',
        'ERROR': '[2025-02-17 12:42:50] - ERROR - Failed to process payment',
        'WARNING': '[2025-02-17 12:35:22] - WARNING - High memory usage detected'
    }
}

```

You must implement exception handling using try and except blocks.


In [25]:
import os
import re
from collections import defaultdict
import pickle

def analyze_logs(log_file):
    """
    Processes a server log file and extracts useful information.

    Args:
        log_file (str): The name of the log file to read.

    Returns:
        dict: A dictionary with the following keys:
            - 'counts': A dictionary with the count of each log level (INFO, ERROR, WARNING).
            - 'last_logs': The most recent log entry as a string.
        None: If there is an error reading the file or processing its contents.

    The function performs the following tasks:
    - Counts the occurrences of each log level (INFO, ERROR, WARNING).
    - Extracts the most recent log entry.
    - Handles exceptions for file errors or format issues.
    """

    regular_expression = r"\[(.*?)\] - (INFO|ERROR|WARNING) - (.+)"

    # your code goes here
    # Return a dictionary as described above in the example and the docstring

    # delete the pass statement below before writing your code
    # pass

    pattern = re.compile(r"\[(.*?)\] - (INFO|ERROR|WARNING) - (.+)")
    stats = {"counts":{"INFO": 0, "ERROR": 0, "WARNING": 0}, "last_logs": {}}
    try:
        with open(log_file, 'r', encoding='utf-8') as f:
            for l in f:
                matchedOb = pattern.match(l)
                if (matchedOb):
                    time, level, message = matchedOb.groups()
                    stats["counts"][level] += 1
                    stats["last_logs"][level] = l
                else:
                    raise Exception("The format of the log entires is incorrect")
        return stats
    except Exception as E:
        print(f"Error happened: {E}")
        return {}





    

def main():

    # Create submission directory
    os.makedirs(str(ROLL_NUMBER), exist_ok=True)
    assert os.path.exists(str(ROLL_NUMBER)) == True, 'Submission directory not created'

    log_stats = analyze_logs("server.log")

    print(log_stats)

    assert "counts" in log_stats, "Counts key missing"
    assert "last_logs" in log_stats, "Last logs key missing"

    assert "INFO" in log_stats["last_logs"], "INFO key missing"
    assert "ERROR" in log_stats["last_logs"], "ERROR key missing"
    assert "WARNING" in log_stats["last_logs"], "WARNING key missing"
    
    if log_stats:
        print("Log Level Counts:", log_stats["counts"])
        print("Last Log Entries:")
        for level, entry in log_stats["last_logs"].items():
            print(f"{level}: {entry}")
        
        with open(f'{ROLL_NUMBER}/log.pkl', 'wb') as f:
            pickle.dump(log_stats, f)

        assert os.path.exists(f'{ROLL_NUMBER}/log.pkl') == True, "Output File not created"

main()



Last Log Entries:

INFO: [2026-04-28 04:30:05] - INFO - Configuration updated

ERROR: [2026-04-28 04:34:08] - ERROR - Application crashed

