In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("/Users/peter/Library/CloudStorage/OneDrive-Personal/Documents/MDS_UBC/DATA_533/emails.csv")




In [2]:

df_sample = df.sample(n=100000, random_state=42)

In [4]:
import re

def clean_first_message(raw_text):
    if not isinstance(raw_text, str):
        return ""

    # Normalize newlines
    text = raw_text.replace("\r", "")

    # 1) Strip any initial header block (Message-ID, Date, etc.)
    parts = text.split("\n\n", 1)
    body = parts[1] if len(parts) == 2 else text

    # 2) Cut at first internal header-like marker
    lower = body.lower()
    markers = ["\nfrom:", "\nto:", "\ncc:", "\nsubject:", "to:", "phone:"]

    cut_pos = len(body)
    for m in markers:
        pos = lower.find(m)
        if pos != -1:
            cut_pos = min(cut_pos, pos)

    body = body[:cut_pos]

    # 3) Remove tabs
    body = body.replace("\t", " ")

    # 4) Normalize whitespace
    body = body.replace("\n", " ")
    body = re.sub(r"\s+", " ", body)

    # 5) Strip
    body = body.strip()

    # 6) Limit to 250 words
    words = body.split()
    body = " ".join(words[:250])

    return body

In [5]:

df_sample["clean_message"] = df_sample["message"].astype(str).apply(clean_first_message)


In [6]:
df_sample.head(50)

Unnamed: 0,file,message,clean_message
427616,shackleton-s/sent/1912.,Message-ID: <21013688.1075844564560.JavaMail.e...,Bill: Thanks for the info. I also spoke with J...
108773,farmer-d/logistics/1066.,Message-ID: <22688499.1075854130303.JavaMail.e...,"Aimee, Please check meter #1591 Lamay gas lift..."
355471,parks-j/deleted_items/202.,Message-ID: <27817771.1075841359502.JavaMail.e...,GCCA Crawfish and rip-off raffle & over-priced...
457837,stokley-c/chris_stokley/iso/client_rep/41.,Message-ID: <10695160.1075858510449.JavaMail.e...,"<<Keoni.zip>> Chris, per your request here are..."
124910,germany-c/all_documents/1174.,Message-ID: <27819143.1075853689038.JavaMail.e...,I'm trying to change the Receipt Meter on deal...
403283,scott-s/_sent_mail/244.,Message-ID: <10142547.1075846737160.JavaMail.e...,What if we replace Section 2 with something li...
293966,love-p/discussion_threads/113.,Message-ID: <18212904.1075858229814.JavaMail.e...,---------------------- Forwarded by Phillip M ...
478830,taylor-m/australia_trading/8.,Message-ID: <14840674.1075860237113.JavaMail.e...,"Dear Mark, As per our discussion at the law co..."
295428,love-p/sent_items/765.,Message-ID: <22170097.1075862178026.JavaMail.e...,got your message last night. What is up? Bet y...
137822,giron-d/deleted_items/170.,Message-ID: <23520008.1075852220995.JavaMail.e...,"Hello Darron, Just wanted to let you know that..."


In [7]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")

texts = df_sample["clean_message"].astype(str).tolist()

embeddings = model.encode(
    texts,
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True  
)


print(embeddings.shape)

Batches:   0%|          | 0/1563 [00:00<?, ?it/s]

(100000, 384)


In [8]:
from sklearn.decomposition import PCA

# embeddings: (n_samples, d)
pca = PCA(n_components=50, random_state=42)
X = pca.fit_transform(embeddings)



In [9]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np

best_score = -1
best_k = None
scores = {}

for k in range(2, 15):
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = km.fit_predict(X)
    score = silhouette_score(X, labels)
    scores[k] = score
    print(f"k={k}, silhouette={score:.4f}")
    if score > best_score:
        best_score = score
        best_k = k

print("Best k:", best_k, "with silhouette:", best_score)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



k=2, silhouette=0.0728


KeyboardInterrupt: 

In [11]:
import hdbscan
clusterer = hdbscan.HDBSCAN(
    min_cluster_size=400,
    metric='euclidean'
)
labels = clusterer.fit_predict(X)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [12]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np

df_sample["cluster"] = labels


mask = labels != -1
if mask.sum() > 1 and len(set(labels[mask])) > 1:
    sil = silhouette_score(X[mask], labels[mask])
    print("Silhouette (no noise):", sil)
else:
    print("Not enough clustered points for silhouette.")

Silhouette (no noise): 0.37169939279556274


In [14]:
import numpy as np

# Count clusters (excluding noise = -1)
unique, counts = np.unique(labels[labels != -1], return_counts=True)

# Combine into list of (cluster_id, size)
cluster_sizes = list(zip(unique, counts))

# Sort by size descending
cluster_sizes_sorted = sorted(cluster_sizes, key=lambda x: -x[1])

# Print
print("Cluster sizes (largest → smallest):")
for cid, size in cluster_sizes_sorted:
    print(f"Cluster {cid}: {size} points")

Cluster sizes (largest → smallest):
Cluster 1: 8365 points
Cluster 2: 4479 points
Cluster 0: 1711 points


In [21]:

pd.set_option('display.max_colwidth', None)
df_sample[df_sample['cluster'] == 0].head(100)


Unnamed: 0,file,message,clean_message,cluster
448503,solberg-g/deleted_items/331.,"Message-ID: <26758243.1075841571375.JavaMail.evans@thyme>\nDate: Tue, 22 Jan 2002 11:37:15 -0800 (PST)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 1/22/02; HourAhead hour: 14;\nCc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>@ENRON\nX-To: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-cc: Meyers, Albert </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BMEYERS>, Williams III, Bill </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>, Dean, Craig </O=ENRON/OU=NA/CN=RECIPIENTS/CN=CDEAN2>, Solberg, Geir </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GSOLBER>, Anderson, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JANDERS3>, Guzman, Mark </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MGUZMAN3>, Mier, Michael </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MMIER>, Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>, Slinger, Ryan </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RSLINGER>\nX-bcc: \nX-Folder: \ExMerge - Solberg, Geir\Deleted Items\nX-Origin: SOLBERG-G\nX-FileName: geir solberg 6-26-02.PST\n\n\n\nStart Date: 1/22/02; HourAhead hour: 14; No ancillary schedules awarded. No variances detected.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2002012214.txt",Start Date: 1/22/02; HourAhead hour: 14; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2002012214.txt,0
282009,linder-e/all_documents/226.,"Message-ID: <25828483.1075841002337.JavaMail.evans@thyme>\nDate: Fri, 6 Apr 2001 02:05:00 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 4/6/01; HourAhead hour: 9; <CODESITE>\nCc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>\nX-To: pete.davis@enron.com\nX-cc: bert.meyers@enron.com, bill.williams.III@enron.com, Craig.Dean@enron.com, dporter3@enron.com, Eric.Linder@enron.com, Geir.Solberg@enron.com, jbryson@enron.com, leaf.harasin@enron.com, monika.causholli@enron.com, mark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com\nX-bcc: \nX-Folder: \eric linder 6-28-02\Notes Folders\All documents\nX-Origin: LINDER-E\nX-FileName: eric linder 6-28-02.nsf\n\nStart Date: 4/6/01; HourAhead hour: 9; No ancillary schedules awarded. No \nvariances detected.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final \nSchedules\2001040609.txt\n\n---- Energy Import/Export Schedule ----\n*** Final schedule not found for preferred schedule.\n Details:\n\n TRANS_TYPE: FINAL\n SC_ID: ECTRT\n MKT_TYPE: 2\n TRANS_DATE: 4/6/01\n TIE_POINT: PVERDE_5_DEVERS\n INTERCHG_ID: EPMI_CISO_SCOUT\n ENGY_TYPE: WHEEL\n*** Final schedule not found for preferred schedule.\n Details:\n\n TRANS_TYPE: FINAL\n SC_ID: ECTRT\n MKT_TYPE: 2\n TRANS_DATE: 4/6/01\n TIE_POINT: SLVRPK_7_SPP\n INTERCHG_ID: EPMI_CISO_SCOUT\n ENGY_TYPE: WHEEL\n\n",Start Date: 4/6/01; HourAhead hour: 9; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001040609.txt ---- Energy Import/Export Schedule ---- *** Final schedule not found for preferred schedule. Details: TRANS_TYPE: FINAL SC_ID: ECTRT MKT_TYPE: 2 TRANS_DATE: 4/6/01 TIE_POINT: PVERDE_5_DEVERS INTERCHG_ID: EPMI_CISO_SCOUT ENGY_TYPE: WHEEL *** Final schedule not found for preferred schedule. Details: TRANS_TYPE: FINAL SC_ID: ECTRT MKT_TYPE: 2 TRANS_DATE: 4/6/01 TIE_POINT: SLVRPK_7_SPP INTERCHG_ID: EPMI_CISO_SCOUT ENGY_TYPE: WHEEL,0
87667,dean-c/inbox/864.,"Message-ID: <14461595.1075852158409.JavaMail.evans@thyme>\nDate: Tue, 9 Oct 2001 04:38:16 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 10/9/01; HourAhead hour: 7; <CODESITE>\nCc: bert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\teric.linder@enron.com, geir.solberg@enron.com, kate.symes@enron.com, \n\tleaf.harasin@enron.com, monika.causholli@enron.com, \n\tmark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\teric.linder@enron.com, geir.solberg@enron.com, kate.symes@enron.com, \n\tleaf.harasin@enron.com, monika.causholli@enron.com, \n\tmark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com\nX-From: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-To: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-cc: Meyers, Bert </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BMEYERS>, Williams III, Bill </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>, Dean, Craig </O=ENRON/OU=NA/CN=RECIPIENTS/CN=CDEAN2>, Linder, Eric </O=ENRON/OU=NA/CN=RECIPIENTS/CN=ELINDER>, Solberg, Geir </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GSOLBER>, Symes, Kate </O=ENRON/OU=NA/CN=RECIPIENTS/CN=KSYMES>, Harasin, Leaf </O=ENRON/OU=NA/CN=RECIPIENTS/CN=LHARASIN>, Causholli, Monika </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MCAUSHOL>, Guzman, Mark </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MGUZMAN3>, Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>, Slinger, Ryan </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RSLINGER>, smerris@enron.com\nX-bcc: \nX-Folder: \CDEAN2 (Non-Privileged)\Dean, Craig\Inbox\nX-Origin: DEAN-C\nX-FileName: CDEAN2 (Non-Privileged).pst\n\n\n\nStart Date: 10/9/01; HourAhead hour: 7; No ancillary schedules awarded. No variances detected. \n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001100907.txt\n\nError retrieving HourAhead price data - process continuing...",Start Date: 10/9/01; HourAhead hour: 7; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001100907.txt Error retrieving HourAhead price data - process continuing...,0
448525,solberg-g/deleted_items/351.,"Message-ID: <19831488.1075841571882.JavaMail.evans@thyme>\nDate: Mon, 21 Jan 2002 17:36:35 -0800 (PST)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Schedule Crawler: HourAhead Failure\nCc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>@ENRON\nX-To: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-cc: Meyers, Albert </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BMEYERS>, Williams III, Bill </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>, Dean, Craig </O=ENRON/OU=NA/CN=RECIPIENTS/CN=CDEAN2>, Solberg, Geir </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GSOLBER>, Anderson, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JANDERS3>, Guzman, Mark </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MGUZMAN3>, Mier, Michael </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MMIER>, Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>, Slinger, Ryan </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RSLINGER>\nX-bcc: \nX-Folder: \ExMerge - Solberg, Geir\Deleted Items\nX-Origin: SOLBERG-G\nX-FileName: geir solberg 6-26-02.PST\n\n\n\nStart Date: 1/21/02; HourAhead hour: 20; HourAhead schedule download failed. Manual intervention required.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2002012120.txt\nError: dbCaps97Data: Cannot perform this operation on a closed database\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\nError: dbCaps97Data: Cannot perform this operation on a closed database\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\nError: dbCaps97Data: Cannot perform this operation on a closed database\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\nError: dbCaps97Data: Cannot perform this operation on a closed database\nError: dbCaps97Data: Cannot perform this operation on a closed database\nError: dbCaps97Data: Cannot perform this operation on a closed database\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\n!!!Unknown database.\nAlias: dbCaps97Data\nError: dbCaps97Data: Cannot perform this operation on a closed database",Start Date: 1/21/02; HourAhead hour: 20; HourAhead schedule download failed. Manual intervention required. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2002012120.txt Error: dbCaps97Data: Cannot perform this operation on a closed database !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data Error: dbCaps97Data: Cannot perform this operation on a closed database !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data Error: dbCaps97Data: Cannot perform this operation on a closed database !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data Error: dbCaps97Data: Cannot perform this operation on a closed database Error: dbCaps97Data: Cannot perform this operation on a closed database Error: dbCaps97Data: Cannot perform this operation on a closed database !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data !!!Unknown database. Alias: dbCaps97Data Error: dbCaps97Data: Cannot perform this operation on a closed database,0
87031,dean-c/inbox/264.,"Message-ID: <520673.1075852143202.JavaMail.evans@thyme>\nDate: Thu, 18 Oct 2001 12:57:24 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Schedule Crawler: HourAhead Failure <CODESITE>\nCc: bert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\teric.linder@enron.com, geir.solberg@enron.com, kate.symes@enron.com, \n\tleaf.harasin@enron.com, mark.guzman@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\teric.linder@enron.com, geir.solberg@enron.com, kate.symes@enron.com, \n\tleaf.harasin@enron.com, mark.guzman@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nX-From: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-To: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-cc: Meyers, Bert </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BMEYERS>, Williams III, Bill </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>, Dean, Craig </O=ENRON/OU=NA/CN=RECIPIENTS/CN=CDEAN2>, Linder, Eric </O=ENRON/OU=NA/CN=RECIPIENTS/CN=ELINDER>, Solberg, Geir </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GSOLBER>, Symes, Kate </O=ENRON/OU=NA/CN=RECIPIENTS/CN=KSYMES>, Harasin, Leaf </O=ENRON/OU=NA/CN=RECIPIENTS/CN=LHARASIN>, Guzman, Mark </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MGUZMAN3>, Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>, Slinger, Ryan </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RSLINGER>, smerris@enron.com\nX-bcc: \nX-Folder: \CDEAN2 (Non-Privileged)\Dean, Craig\Inbox\nX-Origin: DEAN-C\nX-FileName: CDEAN2 (Non-Privileged).pst\n\n\n\nStart Date: 10/18/01; HourAhead hour: 15; HourAhead schedule download failed. Manual intervention required.",Start Date: 10/18/01; HourAhead hour: 15; HourAhead schedule download failed. Manual intervention required.,0
...,...,...,...,...
148550,guzman-m/discussion_threads/1552.,"Message-ID: <19729454.1075840679515.JavaMail.evans@thyme>\nDate: Wed, 18 Apr 2001 00:43:00 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 4/18/01; HourAhead hour: 8; <CODESITE>\nCc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>\nX-To: pete.davis@enron.com\nX-cc: bert.meyers@enron.com, bill.williams.III@enron.com, Craig.Dean@enron.com, dporter3@enron.com, Eric.Linder@enron.com, Geir.Solberg@enron.com, jbryson@enron.com, leaf.harasin@enron.com, monika.causholli@enron.com, mark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com, steven.merris@enron.com\nX-bcc: \nX-Folder: \mark guzman 6-28-02\Notes Folders\Discussion threads\nX-Origin: GUZMAN-M\nX-FileName: mark guzman 6-28-02.nsf\n\nStart Date: 4/18/01; HourAhead hour: 8; No ancillary schedules awarded. No \nvariances detected.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final \nSchedules\2001041808.txt",Start Date: 4/18/01; HourAhead hour: 8; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001041808.txt,0
148908,guzman-m/discussion_threads/1878.,"Message-ID: <27804206.1075840688435.JavaMail.evans@thyme>\nDate: Fri, 27 Apr 2001 11:41:00 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 4/27/01; HourAhead hour: 19; <CODESITE>\nCc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>\nX-To: pete.davis@enron.com\nX-cc: bert.meyers@enron.com, bill.williams.III@enron.com, Craig.Dean@enron.com, dporter3@enron.com, Eric.Linder@enron.com, Geir.Solberg@enron.com, jbryson@enron.com, leaf.harasin@enron.com, monika.causholli@enron.com, mark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com, steven.merris@enron.com\nX-bcc: \nX-Folder: \mark guzman 6-28-02\Notes Folders\Discussion threads\nX-Origin: GUZMAN-M\nX-FileName: mark guzman 6-28-02.nsf\n\nStart Date: 4/27/01; HourAhead hour: 19; No ancillary schedules awarded. No \nvariances detected.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final \nSchedules\2001042719.txt",Start Date: 4/27/01; HourAhead hour: 19; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001042719.txt,0
337699,meyers-a/deleted_items/283.,"Message-ID: <24875395.1075841295956.JavaMail.evans@thyme>\nDate: Sun, 27 Jan 2002 00:57:02 -0800 (PST)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Schedule Crawler: HourAhead Failure\nCc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: albert.meyers@enron.com, bill.williams@enron.com, craig.dean@enron.com, \n\tgeir.solberg@enron.com, john.anderson@enron.com, \n\tmark.guzman@enron.com, michael.mier@enron.com, pete.davis@enron.com, \n\tryan.slinger@enron.com\nX-From: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-To: Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>\nX-cc: Meyers, Albert </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BMEYERS>, Williams III, Bill </O=ENRON/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>, Dean, Craig </O=ENRON/OU=NA/CN=RECIPIENTS/CN=CDEAN2>, Solberg, Geir </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GSOLBER>, Anderson, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JANDERS3>, Guzman, Mark </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MGUZMAN3>, Mier, Michael </O=ENRON/OU=NA/CN=RECIPIENTS/CN=MMIER>, Davis, Pete </O=ENRON/OU=NA/CN=RECIPIENTS/CN=PDAVIS1>, Slinger, Ryan </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RSLINGER>\nX-bcc: \nX-Folder: \ExMerge - Meyers, Albert\Deleted Items\nX-Origin: MEYERS-A\nX-FileName: bert meyers 6-25-02.PST\n\n\n\nStart Date: 1/27/02; HourAhead hour: 3; HourAhead schedule download failed. Manual intervention required.",Start Date: 1/27/02; HourAhead hour: 3; HourAhead schedule download failed. Manual intervention required.,0
282757,linder-e/all_documents/903.,"Message-ID: <24280884.1075841019085.JavaMail.evans@thyme>\nDate: Sun, 29 Apr 2001 16:40:00 -0700 (PDT)\nFrom: pete.davis@enron.com\nTo: pete.davis@enron.com\nSubject: Start Date: 4/29/01; HourAhead hour: 24; <CODESITE>\nCc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nMime-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nContent-Transfer-Encoding: 7bit\nBcc: bert.meyers@enron.com, bill.williams.iii@enron.com, craig.dean@enron.com, \n\tdporter3@enron.com, eric.linder@enron.com, geir.solberg@enron.com, \n\tjbryson@enron.com, leaf.harasin@enron.com, \n\tmonika.causholli@enron.com, mark.guzman@enron.com, \n\tpete.davis@enron.com, ryan.slinger@enron.com, \n\tsteven.merris@enron.com\nX-From: Schedule Crawler<pete.davis@enron.com>\nX-To: pete.davis@enron.com\nX-cc: bert.meyers@enron.com, bill.williams.III@enron.com, Craig.Dean@enron.com, dporter3@enron.com, Eric.Linder@enron.com, Geir.Solberg@enron.com, jbryson@enron.com, leaf.harasin@enron.com, monika.causholli@enron.com, mark.guzman@enron.com, pete.davis@enron.com, ryan.slinger@enron.com, steven.merris@enron.com\nX-bcc: \nX-Folder: \eric linder 6-28-02\Notes Folders\All documents\nX-Origin: LINDER-E\nX-FileName: eric linder 6-28-02.nsf\n\nStart Date: 4/29/01; HourAhead hour: 24; No ancillary schedules awarded. No \nvariances detected.\n\n LOG MESSAGES:\n\nPARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final \nSchedules\2001042924.txt",Start Date: 4/29/01; HourAhead hour: 24; No ancillary schedules awarded. No variances detected. LOG MESSAGES: PARSING FILE -->> O:\Portland\WestDesk\California Scheduling\ISO Final Schedules\2001042924.txt,0
