# Eml Parser
### Written by TJ Nel

This script will take a .eml file and parse out information based on that email file.

In [3]:
eml_loc = input("What .eml file or directory would you like to analyze? ")

What .eml file or directory would you like to analyze? eml


This is the main script logic for parsing the email

In [4]:
import mailparser
import os
import re
import pandas as pd

from os import listdir
from os.path import isfile, join
from urllib.parse import urlparse

file_list = []
COLUMN_NAMES = ["Date", "Subject", "To", "From", "Body", "Attachments", "URLs"]
email_df = pd.DataFrame(columns=COLUMN_NAMES)

def parse_eml(eml_file):
    attachment_names = []
    msg = mailparser.parse_from_file(eml_file)
    url_list = [ urlparse(url).geturl() for url in re.findall(r'https?://\S+', msg.body)]
    print("Email Parsed from: {} to: {} about {} on {} with {} attachments and {} urls\n".format(msg.From_, msg.To, msg.Subject, msg.date, len(msg.attachments), len(url_list)))
    for attachment in msg.attachments:
        attachment_names.append(attachment["filename"])
    email_df.loc[len(email_df)] = [pd.to_datetime(msg.date, errors = 'coerce'), msg.Subject, msg.To, msg.From_, msg.body, ", ".join(attachment_names) , ", ".join(url_list)]
    

if eml_loc:
    if os.path.isdir(eml_loc):
        file_list = ['{}/{}/{}'.format(os.getcwd(),eml_loc,f) for f in listdir(eml_loc) if isfile(os.path.join(eml_loc, f))]
    else:
        file_list.append(eml_loc) 
        
    for eml_file in file_list:
        print(eml_file)
        parse_eml(eml_file)
else:
    print("You need to enter a .eml file or directory!")

/Users/tjadanel/Documents/Projects/Python/EmailParsing/eml/attachment_content_disposition.eml
Email Parsed from: [('', 'foo@example.com')] to: [('', 'blah@example.com')] about testing on 2005-06-06 20:21:22 with 1 attachments and 0 urls

/Users/tjadanel/Documents/Projects/Python/EmailParsing/eml/raw_email_bad_time.eml
Email Parsed from: [('Атиковa', 'yusuf75thu@auracom.net')] to: [('', 'abcdefg@AAAAAAAAA.net')] about [0]: XXXXXXX XXXXX XXXXX ! on 3609-06-30 09:33:50 with 0 attachments and 1 urls

/Users/tjadanel/Documents/Projects/Python/EmailParsing/eml/example.eml
Email Parsed from: [('Example', 'from@example.com')] to: [('', 'to@example.com')] about test! on 2010-03-27 12:11:21 with 0 attachments and 1 urls

/Users/tjadanel/Documents/Projects/Python/EmailParsing/eml/attachment_pdf_lf.eml
Email Parsed from: [('Test Tester', 'xxxx@xxxx.com')] to: [('', 'xxxx@xxxx.com'), ('', 'xxxx@xxxx.com')] about Another PDF with 🎉 Unicode chars in it 🍿 on 2005-05-10 17:26:39 with 1 attachments and 

In [5]:
email_df

Unnamed: 0,Date,Subject,To,From,Body,Attachments,URLs
0,2005-06-06 20:21:22,testing,"[(, blah@example.com)]","[(, foo@example.com)]",This is the first part.,api.rb,
1,NaT,[0]: XXXXXXX XXXXX XXXXX !,"[(, abcdefg@AAAAAAAAA.net)]","[(Атиковa, yusuf75thu@auracom.net)]",Filter2: This message has been scanned for vir...,,"http://www.mailscanner.info/""><b>MailScanner</..."
2,2010-03-27 12:11:21,test!,"[(, to@example.com)]","[(Example, from@example.com)]","Test\n--- mail_boundary ---\n<html xmlns:v=""ur...",,"http://www.w3.org/TR/REC-html40"">"
3,2005-05-10 17:26:39,Another PDF with 🎉 Unicode chars in it 🍿,"[(, xxxx@xxxx.com), (, xxxx@xxxx.com)]","[(Test Tester, xxxx@xxxx.com)]","Just attaching another PDF, here, to see what ...",broken.pdf,
4,2008-11-22 04:04:59,Testing 123,"[(Mikel Lindsaar, raasdnil@gmail.com)]","[(Mikel Lindsaar, test@lindsaar.net)]",Plain email.\n\nHope it works well!\n\nMikel,,
5,NaT,Formao FrenetikPolis: Mega Campanha Final Vero...,"[(, martin@internet.ao), (, iris@internet.ao),...","[(Formação Frenetikpolis, info@formacaofreneti...",TEST,,
6,2009-09-19 15:49:36,illegal copy of our patient education software,"[(, abuser@r.ru)]","[(Andrey Kuznetsov, ak@g.com)]",,,
7,2010-09-22 07:30:53,eBay Bid - 2008 Ford Super Duty F-350 DRW King...,"[(, e-f5f4@app.ar-example.com)]","[(, no-reply@crm.el-example.org)]",Body Text,,
8,2005-05-02 22:07:05,"Re: Test:""漢字""mid""漢字""tail","[(, jamis@37signals.com)]","[(Jamis Buck, jamis@37signals.com)]","대부분의 마찬가지로, 우리는 하나님을 믿습니다.\r\n\r\n제 이름은 Jamis입니다.",,
9,2010-10-15 03:25:06,,"[(, ), (, user-example@aol.com), (, e-s-a-s-22...","[(, anonymous@i.tp.host)]",CONTACT:\n\n\n\n\nCOMMENT:\n\n\nPAGE THEY WERE...,,


In [None]:
eml_output = input("What would you like to name this output file (csv)? ")

In [135]:
try:
    email_df.to_csv(eml_output, encoding='utf-8', index=False)
    print("Complete!")
except:
    print("Something went wrong")


Complete!
