In [None]:
import email
from email.parser import Parser
import os
import shutil
import emails_dictionary
import csv

In [None]:
def emails_are_equal(content1, content2):
    parser = Parser()
    email1 = parser.parsestr(content1)
    email2 = parser.parsestr(content2)
    
    headers_to_compare = ['Date', 'From', 'To', 'Subject']
    
    for header in headers_to_compare:
        if email1[header] != email2[header]:
            return False

    if email1.get_payload() != email2.get_payload():
        return False
    
    return True

In [None]:
def is_email_address_in_to_field(to_field, email_address_list):
    # Split the 'To' field into a list of email addresses
    to_emails = to_field.split(', ')
    # Check if any of the email addresses in the 'To' field is in the email_address_list
    return any(email.strip() in email_address_list for email in to_emails)

In [None]:
def create_all_sent_and_all_received_directories(person, email_addresses):
    rootdir = os.getcwd() + '/maildir/' + person  # Set the path to your maildir

    # List all items in rootdir
    items = os.listdir(rootdir)

    # Construct paths for the new directories
    all_sent_dir = os.path.join(rootdir, 'all_sent')
    all_received_dir = os.path.join(rootdir, 'all_received')

    # Create the directories if they don't exist
    if not os.path.exists(all_sent_dir):
        os.makedirs(all_sent_dir)
    if not os.path.exists(all_received_dir):
        os.makedirs(all_received_dir)

    for item in items:
        if item == 'all_sent' or item == 'all_received':
            continue
        # Construct the full path
        item_path = os.path.join(rootdir, item)

        # Check if the item is a directory
        if os.path.isdir(item_path):

            # List all items in the current directory
            sub_items = os.listdir(item_path)

            for sub_item in sub_items:
                # Construct the full path
                sub_item_path = os.path.join(item_path, sub_item)

                # Check if the item is a file
                if os.path.isfile(sub_item_path):
                    # Open the file
                    with open(sub_item_path, 'r', errors='ignore') as file:
                        # Read the content of the file
                        content = file.read()

                        # Parse the content of the file
                        parser = Parser()
                        email = parser.parsestr(content)

                        if email['From'] in email_addresses:
                            shutil.copy(sub_item_path, all_sent_dir)
                        elif email['To'] and is_email_address_in_to_field(email['To'], email_addresses):
                            shutil.copy(sub_item_path, all_received_dir)
                        else:
                            pass


In [None]:
def drop_duplicated_emails(directory):
    # List all items in the directory
    all_items = os.listdir(directory)

    for item in all_items:
        # Construct the full path
        item_path = os.path.join(directory, item)
        
        # Check if the item is a file
        if os.path.isfile(item_path):
            # Open the file
            with open(item_path, 'r', errors='ignore') as file:
                # Read the content of the file
                content = file.read()
                                
                # List all items in the directory
                all_items = os.listdir(directory)
                
                for other_item in all_items:
                    # Construct the full path
                    other_item_path = os.path.join(directory, other_item)
                    
                    # Check if the item is a file
                    if os.path.isfile(other_item_path) and other_item_path != item_path:
                        # Open the file
                        with open(other_item_path, 'r', errors='ignore') as other_file:
                            # Read the content of the file
                            other_content = other_file.read()
                            
                            # Check if the emails are equal
                            if emails_are_equal(content, other_content):
                                # Remove the duplicate
                                os.remove(other_item_path)

In [None]:
for person in emails_dictionary.emails:
    email_addresses = emails_dictionary.emails[person]
    create_all_sent_and_all_received_directories(person, email_addresses)
    drop_duplicated_emails(os.getcwd() + '/maildir/' + person + '/all_sent')
    drop_duplicated_emails(os.getcwd() + '/maildir/' + person + '/all_received')
    print(person + ' done')

In [None]:
for person in emails_dictionary.emails:
    # count all_sent and all_received
    all_sent_dir = os.path.join(os.getcwd() + '/maildir/' + person, 'all_sent')
    all_received_dir = os.path.join(os.getcwd() + '/maildir/' + person, 'all_received')
    try:
        all_sent_items = os.listdir(all_sent_dir)
        all_received_items = os.listdir(all_received_dir)
        print(person + ' all_sent: ' + str(len(all_sent_items)))
        print(person + ' all_received: ' + str(len(all_received_items)))
    except:
        continue

In [None]:
email_stats = {}

for person in emails_dictionary.emails:
    try:
        all_sent_dir = os.path.join(os.getcwd() + '/maildir/' + person, 'all_sent')
        all_received_dir = os.path.join(os.getcwd() + '/maildir/' + person, 'all_received')
        all_sent_items = os.listdir(all_sent_dir)
        all_received_items = os.listdir(all_received_dir)
        email_stats[person] = {'sent': len(all_sent_items), 'received': len(all_received_items)}
    except:
        continue

In [None]:
filename = "email_stats.csv"

with open(filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    writer.writerow(['person', 'sent', 'received'])
    
    for person, stats in email_stats.items():
        writer.writerow([person, stats['sent'], stats['received']])