In [30]:
import pandas as pd
import numpy as np
import re
import nltk

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Import natural language processing libraries
import nltk
from sklearn.feature_extraction.text import CountVectorizer

from sklearn.preprocessing import StandardScaler

from sklearn.cluster import DBSCAN, KMeans

from sklearn.metrics import silhouette_score

In [2]:
def custom_tokenizer(text):
    # allow alphabetical characters and spaces only
    processed_text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Filter out additional spaces
    processed_text = re.sub(r'\s\s*', ' ', processed_text)
    # Filter out weird symbols i.e. "1x2"
    processed_text = re.sub(r'\d*x\d*', '', processed_text)
    
    # split sentence into words
    listofwords = processed_text.split(' ')
    listofstemmed_words = []
    
    # remove stopwords and any tokens that are just empty strings
    for word in listofwords:
        if (not word in ENGLISH_STOP_WORDS) and (word!=''):
            # Stem words
            stemmed_word = stemmer.stem(word)
            listofstemmed_words.append(stemmed_word)

    return listofstemmed_words

In [16]:
# import the nltk stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords 

ENGLISH_STOP_WORDS = stopwords.words('english')
stemmer = nltk.stem.PorterStemmer()

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/rosswillett/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [17]:
df = pd.read_csv('./data/enron_external.csv')

In [18]:
df['content'].isna().sum()

38

In [19]:
df = df[~df['content'].isna()]

In [20]:
df['content'].isna().sum()

0

In [21]:
word_vectorizer = CountVectorizer(
    tokenizer=custom_tokenizer,
    min_df=0.1,
)

In [10]:
word_vectorizer.fit(df['content'])
word_vectorized = word_vectorizer.transform(df['content'])



In [12]:
word_df = pd.DataFrame(
    data=word_vectorized.toarray(),
    columns=word_vectorizer.get_feature_names_out(),
)

In [13]:
word_df

Unnamed: 0,addit,address,also,attach,avail,back,best,busi,buy,call,...,visit,want,way,web,wednesday,week,well,work,would,year
0,0,0,1,0,0,2,0,0,0,3,...,1,0,0,0,0,0,1,1,1,0
1,0,0,0,0,0,0,0,2,1,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,1,...,3,0,0,3,0,0,1,0,2,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85919,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
85920,0,0,0,0,2,0,0,1,0,1,...,1,0,0,0,0,0,0,0,0,0
85921,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
85922,1,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,2,0,0,0,0


In [14]:
word_df.to_csv('./data/enron_external_stem_vectorized.csv')

In [8]:
word_df = pd.read_csv('./data/enron_external_stem_vectorized.csv')

In [9]:
scaler = StandardScaler()
word_df_scaled = scaler.fit_transform(word_df)

In [38]:
# Initialize
kmeans = KMeans(n_clusters=3)

# Fit
kmeans.fit(word_df_scaled)

# Predict
km_cluster_labels = kmeans.predict(word_df_scaled)



In [39]:
df['category'] = km_cluster_labels

In [44]:
for cluster in np.unique(km_cluster_labels):
    display(df[df['category'] == cluster].sample(1))

Unnamed: 0,from,to,subject,content,category
66018,prebonenergy@corporateecards.com,sandra.f.brawner@enron.com,Prebon Yamane's Energy Holiday Party,"Hi Sandra, You are cordially invited to attend Prebon Yamanes Energy Holiday Party. Please view your invitation and RSVP NOW its as easy as clicking on the URL below http://cards.corporateecards.com/view.aspIDIttSTUX8794W91773 AOL users need to copy & paste the URL into their web browser. This invite is supported by Netscape and Internet Explorer 3.0 and above. If you are having difficulties viewing or responding to the invite please click REPLY to RSVP or follow the instructions below: You will need to copy and paste the address into your World Wide Web browser. 1. Make sure you only have one web browser open. 2. Highlight the address by dragging the cursor across the URL make sure you get the whole address. 3. Copy and paste the URL into your web browser. 4. Hit enter.",0


Unnamed: 0,from,to,subject,content,category
77988,suzanne_nimocks@mckinsey.com,skean@enron.com,California Power Crisis Update (No. 10),"We have been pulling together these weeklysometimes more often summaries for internal purposes. Would you find it helpful to be on the distribution list Hope you are doing well. Look forward to touching base soon. ----- Forwarded by Suzanne Nimocks/HOU/NorthAmerica/MCKINSEY on 03/28/2001 03:41 AM ----- Memorandum TO: Pru Sheppard BCC: Suzanne Nimocks FROM: Pru Sheppard B. Venki Venkateshwara DATE: March 27, 2001 California Power Crisis Update No. 10 DEVELOPMENTS THIS WEEK, 3/23/2001 The weeks highlights include: Continued indications that the issue of market power and possible remedies for it is likely to remain a high profile issue in California and elsewhere both retroactively and prospectively An ironical situation with respect to QFs in which QF power under contract is effectively being released into the market at higher prices A court order requiring Reliant to continue to sell power to the ISO even if it is not being paid in a full and timely manner Another Stage 3 emergency and rolling blackouts Market power There are continued indications that the issue of market power will not be settled simply. This week there was a lengthy and politically influential front page story in the New York Times about FERCs passive approach to policing generators Critics Say U.S. Energy Agency Is Weak in Oversight of Utilities. The story was by Jeff Gerth and Joseph Kahn. Jeff Gerths 1992 story on the Whitewater deal is viewed by journalists to have been the origin of what eventually became a multi-year investigation of Bill Clinton. The key issues are familiar: Does market power exist to a degree that warrants remedies such as price caps, refunds, and so on If so, what is the basis for asserting that market power exists and what is the remedy See the discussion in the New York Times article on the good hours vs. bad hours approach and the associated political decision not to deal with good hours. Can market power be used as leverage to eventually settle generator bills in C...",1


Unnamed: 0,from,to,subject,content,category
64680,issuealert@scientech.com,issuealerthtml@listserv.scientech.com,Some Reasons Why Duke Energy Was Named Company of the Year,"SPONSORS INFORMATION PRODUCTS CONFERENCES Center for Public Utilties The Basics: Practical Skills for a Changing Utility December 10, 2001 Some Reasons Why Duke Energy Was Named Company of the Year By Will McNamara Director, Electric Industry Analysis News item from PR Newswire Duke Energy NYSE: DUK was named Energy Company of the Year at the 2001 Financial Times Global Energy Awards ceremonies held last week in New York. The awards, sponsored by publishing and information company Platts and the professional services firm Deloitte Touche Tohmatsu, recognized Duke Energy for its diversified operations and its successful overall growth during the past year. Analysis: 2001 may turn out to be the most tumultuous year in the history of the energy industry, and certainly the most volatile since deregulation began about four years ago. However, in the course of a year that has witnessed the bankruptcy filings of two leading energy companies Pacific Gas & Electric Co. and Enron, Duke Energys success during the same time frame is all the more impressive. In addition to earnings that consistently meet or exceed expectations, Duke has carved out a unique niche in the energy merchant space, the same market in which its competitor Enron has now failed. The pending acquisition of Vancouver-based Westcoast Energy should also add to Dukes already-diverse portfolio, give it a lock on Canadas seemingly abundant gas reserve fields, and expand the companys North American pipeline capacity. However, in my estimation, the real significance of Dukes performance is the strategy that is working behind the scenes at this company. While more energy companies continue to announce massive restructuring plans-dividing up their regulated and unregulated businesses-Duke remains intact. Further, while other companies become more focused on a primary fuel source natural gas, coal or nuclear, Duke remains broadly diversified. What this means is that, once again, Duke Energy is bucking trends and ...",2


In [13]:
# Instantiate
dbscan_model = DBSCAN(eps=7, min_samples=50)

# Fit and get labels
cluster_labels = dbscan_model.fit_predict(word_df_scaled)

In [14]:
np.unique(cluster_labels)

array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [24]:
pd.set_option('display.max_colwidth', 2000)

In [22]:
df['category'] = cluster_labels

In [29]:
for cluster in np.unique(cluster_labels):
    display(df[df['category'] == cluster].sample(1))

Unnamed: 0,from,to,subject,content,category
37065,orders@shutterfly.com,ebass@enron.com,Your Shutterfly order #000026422747-8000012,"Hello Eric, Thank you for ordering from Shutterfly Please keep a copy of this email as a record of the dollar amount that will be credited towards your free prints, in Shutterflys Give What You Take holiday promotion. Remember, from November 15 to December 31, 2000 when you spend a total of $10 up to $25 you will earn 5 free 4x6 prints. When you spend a total of $25 or more during that time, you will earn 15 free 4x6 prints. Your credits will be applied to your account on January 8, 2001. See the website for details of the promotion. Below is a summary of your Shutterfly order. We will notify you by email when your order is shipped. Please note: Due to the highly automated nature of our system, we are unable to cancel or make changes to orders once they have been placed. ---------------------------------------------------------------------- Order Total $3.47 ---------------------------------------------------------------------- To: Eric Bass 2302 Travis St. 8225 Houston, TX 77006 United States of America 27 4x6 Print 4x6 $13.23 Shipping USPS1PRNT01 $2.49 Discount SF40 - $12.25 Tax TAXUSA $0.00 ---------------------------------------------------------------------- ------------ Total $3.47 ------------ Note: Large orders and orders containing multiple print sizes may be shipped in more than one envelope. Cards will be shipped separately. We wont charge your credit card until all of the items in your order have been shipped. If you need to get in touch with us about your order, please send an email to ordersshutterfly.com or just reply to this message. TELL A FRIEND -- GET FREE PRINTS Why not help spread the word about Shutterflys services and features Well reward you with 10 free 4x6 prints up to 100 total for every person you refer who opens their own free Shutterfly account. Your friends will also get free prints for signing up. To tell a friend about Shutterfly, click here: http://www.shutterfly.com/jump/signin.jspre/info/taflanding.jsp Thank you for using Shut...",-1


Unnamed: 0,from,to,subject,content,category
3025,vjwindsor@hotmail.com,vkamins@enron.com,From Vicky Windsor,"Dear Vince, How are you Well I hope. I hope you don,t mind me writing to you. You may remember that 5 months ago I left Risk Publications and moved to a charity. Having been here for only a few months, I have decided that this job is not for me I miss the buzz of a corporate office and I have decided to move back into the corporate sector. Because of my previous experience and knowledge of the energy sector, I am very interested in moving into this area. I have always thought that it would be great to work for Enron because it is such a dynamic company and I am planning to approach the London office to discuss any opportunities, which might be available. I am particularly interested in product marketing and research, although I am very open-minded at the moment. I wondered whether you could recommend the right person to speak to in London. I know that you are incredibly busy, but any help you can give me would be fantastic Vince. Thanks and best regards, Vicky Windsor Get Your Private, Free E-mail from MSN Hotmail at http://www.hotmail.com. Share information about yourself, create your own public profile at http://profiles.msn.com.",0


Unnamed: 0,from,to,subject,content,category
70521,navigator@nisource.com,navigator@nisource.com,"TCO- CAPACITY ALLOC-TIMELY NOMINATION CYCLE OCT. 13-15, 2001","COLUMBIA GAS TRANSMISSION CORPORATION NOTICE TO ALL INTERESTED PARTIES OCTOBER 12, 2001 Notice ID: 3179 5 - NO RESPONSE REQUIRED SUBJECT: CAPACITY ALLOCATION-TIMELY NOMINATION CYCLE FOR OCTOBER 13-15, 2001 PLEASE NOTE THAT ALL ALLOCATIONS ARE SUBJECT TO CHANGE BASED ON THE RECEIPT OF ANY NEW NOMINATIONS OR INTRADAY NOMINATIONS. RECEIPTS: TENNESSEE: UNIONVILLE: OCTOBER 13, 2001 - New IPP customers received equal shares of available capacity or their October 13, 2001 nominated quantities, whichever was less. UNIONVILLE: OCTOBER 14, 2001 - There is no capacity for new IPP customers. There is no capacity for IPP increases. UNIONVILLE: OCTOBER 15, 2001 - There is no capacity for new IPP customers. Flowing IPP customers nominating increases received 101.46% of their October 14, 2001 flowing volumes or their October 15, 2001 nominated quantities, whichever was less. PAULDING: There is no capacity for IPP customers. There is no capacity for secondary firm customers. DELIVERIES: ALGONQUIN: HANOVER: There is no capacity for IPP customers. There is no capacity for secondary firm customers. Excess MDWQ Available ISS Withdrawals Available SIT Withdrawals Available Imbalance Drawdowns Available PAL Lends/Unparks Available Excess MDIQ Not Available ISS Injections Not Available SIT Injections Not Available Imbalance Paybacks Not Available PAL Parks/Loan Paybacks Not Available Call Gas Control 24 hours in advance at 304 357-2606 to request approval. Columbia Gas Transmission has completed allocating all of its receipt and delivery points for the gas days of October 13-15, 2001. You may now view the quantities you are authorized to flow on October 13-15, 2001. On the Internet NAVIGATOR, view Nominations, Scheduled Quantity, Scheduled Quantity for Service Requestor-Summary, Nominations to be Viewed, and select Reduced Nominations. The quantities confirmed to flow for your account are available on the Internet NAVIGATOR after 5:30 p.m. Eastern Time on Friay, October 12, 2001. If yo...",1


Unnamed: 0,from,to,subject,content,category
70119,navigator@nisource.com,navigator@nisource.com,"TCO - REV.CAPACITY UPDATE EFFECTIVE FOR OCTOBER 29, 2001","COLUMBIA GAS TRANSMISSION CORPORATION NOTICE TO ALL INTERESTED PARTIES OCTOBER 26, 2001 Notice ID: 3216 5 - NO RESPONSE REQUIRED SUBJECT: REVISED-CAPACITY UPDATE EFFECTIVE FOR MONDAY, OCTOBER 29, 2001 CHANGES ARE INDICATED WITH AN Effective Monday, October 29, 2001, capacities will be as follows: Excess MDWQ Available ISS Withdrawals Available SIT Withdrawals Available Imbalance Drawdowns Available Excess MDIQ Available ISS Injections Available SIT Injections Available Imbalance Paybacks Available PAL Lends/Unparks Available PAL Parks/Loan Paybacks Available Call Gas Control 24 hours in advance at 304 357-2606 to request approval. Non-firm receipt capacity will be as follows: TENNESSEE: Brinker B12 20,000 Broad Run B9 300,000 Cambridge B10 20,000 Dungannon B11 20,000 Highland B17 0 NOTE: GAS RECEIVED AT HIGHLAND MUST BE DELIVERED IN THE NORTHERN PORTION OF MARKET AREA 38 OR THE NORTHWEST LATERAL OF MARKET AREA 36, DIRECTLY NORTH OF HIGHLAND. Milford B18 20,000 North Greenwood B22 0 Unionville B15 50,000 NOTE: EFFECTIVE THURSDAY, AUGUST 16, 2001, ANY SHIPPER UTILIZING A CONTRACT THAT HAS A PRIMARY RECEIPT POINTS WITH THE FOLLOWING POINTS, MUST UTILIZE THESE POINTS: Brinker B12 Cambridge B10 Dungannon B11 Highland B17 Milford B18 Unionville B15 TEXAS EASTERN: Delmont C16 0 Eagle C22 20,000 Hooker C9 20,000 Pennsburg C23 20,000 Windridge C12 20,000 NATIONAL FUEL: Independence M1 0 Ellwood City L1 15,000 TRANSCO: Downingtown E3 2,500 Emporia I E13 60,000 Rockville E2 0 Dranesville E1 0 EQUITABLE GAS: Hi Hat F3 0 KENTUCKY WEST VIRGINIA KYWV: Beaver Creek H1 0 CNR PIPELINE: Boldman CNR02 0 Conoway CNR03 0 Johns Creek CNR08 0 Canada CNR09 0 Canada CNR10 0 Stafford CNR11 0 Thacker/Majestic CNR12 0 Briar Mtn. CNR13 0 Huff Creek CNR14 0 CONOCO: 0 Grant P1 0 NOTE: ANY APPALACHIAN PRODUCTION FLOWING DIRECTLY INTO COLUMBIAS LINE KA BETWEEN COLUMBIAS BOLDMAN COMPRESSOR STATION AND COLUMBIAS HUFF CREEK COMPRESSOR STATION AS WELL AS PRODUCTION FLOWING INTO COLUMBIAS LINES SM-11...",2


Unnamed: 0,from,to,subject,content,category
11266,dkinney@columbiaenergygroup.com,"kevin.ruscitti@enron.com, chris.germany@enron.com",CES Daily Requirements for 5/31/00 and after,"The attached file contains our daily volume requirements for 5/31 and the following days as listed . The file there are three worksheets labelled as follows: Total Reqs: Contains the volumes that CES is requesting from Enron at each of the delivery points and dates indicated in the columns labelled NOM. The Enron schedulers will want to check this sheet to see how much gas is needed at a specific point. The AGL volumes are not timely and should be ignored until further notice. CES schedulers note: the attached file is archived on P:\Energy Ops\Enron\May00\Daily\ .xls, so you dont have to save a duplicate copy elsewhere on the network. Daily Change : shows the change increase /decrease - in the daily volume needed from Enron at the designated citygate delivery point relative to the supply volumes arranged previously with Enron prior to the current date. For the current gas day, the change is an intra-day change and is calculated using the previous days volume request for the current day e.g., If the current gas day is 3/28, then the change is computed with respect to the 3/27 request for 3/28. Note that this procedure assumes that the prior day supply change was in fact effectuated. For tomorrows gas day and any subsequent ones shown, the change is computed as the latest Dth Nomination requirement less the First-of-the-month supply volume plus or minus any adjustments made since the first-of-the-month to the FOM supply for the future days shown. e.g., If today is 3/28, then the change for 3/29, 3/30, etc. is computed with respect to the first-of-the-month volumes for those days adjusted for any additional purchases or sell-backs since the first-of-the-month thru 3/27. This sheet should be useful in determining prices for daily changes in gas volumes that we take or turn back. The column labelled ENA Daily Swing /- vs FOM volume shows the absolute magnitude of the daily swings allowed by ENA at pre-specified prices. Prices for Swing volumes exceeding these magnitu...",3


Unnamed: 0,from,to,subject,content,category
74467,memberservices@travelocity.com,pkeavey@enron.com,Travelocity.com fare watcher update,"Saturday, November 17, 2001 Dear Peter, Travelocity Fare Watcher, your personal airfare tracking service, has found fare changes in 3 of your markets. Note: Fares change quickly and sell out fast. Visit Travelocity now to take advantage of this great opportunity Click on this link or paste the URL into your browser: http://dps1.travelocity.com:80/fwemindiv.ctlfw8N0G2&ServiceTRAVELOCITY&LANGEN HAVE YOUR TRAVEL NEEDS CHANGED Looking to go someplace new Be sure to update your Fare Watcher subscription to include your favorite destinations. FARES IAH-LGA FROM: Houston-Bush Intercontinental IAH TO: New York LaGuardia LGA PRICE: NEW PRICE WAS DIFFERENCE --------- ------- ---------- $208.00 $262.00 -$54.00 LGA-IAH FROM: New York LaGuardia LGA TO: Houston-Bush Intercontinental IAH PRICE: NEW PRICE WAS DIFFERENCE --------- ------- ---------- $208.00 $262.00 -$54.00 IAH-JFK FROM: Houston-Bush Intercontinental IAH TO: New York Kennedy JFK PRICE: NEW PRICE WAS DIFFERENCE --------- ------- ---------- $208.00 $262.00 -$54.00 GREAT HOTEL DEALS Travelocity.com offers Value Rates at great hotels in: New York LaGuardia LGA starting at just $69 Click: http://dps1.travelocity.com:80/hotsrqst.ctlmode2&airportLGA&ratecatV Houston-Bush Intercontinental IAH starting at just $55 Click: http://dps1.travelocity.com:80/hotsrqst.ctlmode2&airportIAH&ratecatV New York Kennedy JFK starting at just $69 Click: http://dps1.travelocity.com:80/hotsrqst.ctlmode2&airportJFK&ratecatV Thank you for choosing Travelocity as your one-stop travel resource Sincerely, The Travelocity Team Travelocity.com - Go Virtually Anywhere http://www.travelocity.com/ ---------------------------------------------------------------- VACATION DEALS Hot deals to the Caribbean, Las Vegas, and more http://leisure.travelocity.com/RealDeals/0,2942,TRAVELOCITYVAC,00.html CRUISE DEALS Dont miss a look at our incredible Cruise Deals Updated daily, find the voyage that suits your budget http://leisure.travelocity.com/RealDeals/0,29...",4


Unnamed: 0,from,to,subject,content,category
16917,rrga-l@list.rtowest.org,rrga-l@list.rtowest.org,FW:,"Please remove my name Jack Anderson, jandersonclarkpud.com from your list. Thank you. -----Original Message----- From: Dalia, Keith A - TOS-DITT1 mailto:kadaliaBPA.GOV Sent: Tuesday, May 15, 2001 4:40 PM To: RRGA-LLIST.RTOWEST.ORG Subject: You have been added to the RRGA-L mailing list RTO West Congestion Model Content Group by Keith Dalia . Please save this message for future reference, especially if this is the first time you subscribe to an electronic mailing list. If you ever need to leave the list, you will find the necessary instructions below. Perhaps more importantly, saving a copy of this message and of all future subscription notices from other mailing lists in a special mail folder will give you instant access to the list of mailing lists that you are subscribed to. This may prove very useful the next time you go on vacation and need to leave the lists temporarily so as not to fill up your mailbox while you are away You should also save the welcome messages from the list owners that you will occasionally receive after subscribing to a new list. To send a message to all the people currently subscribed to the list, just send mail to RRGA-LLIST.RTOWEST.ORG. This is called sending mail to the list, because you send mail to a single address and LISTSERV makes copies for all the people who have subscribed. This address RRGA-LLIST.RTOWEST.ORG is also called the list address. You must never try to send any command to that address, as it would be distributed to all the people who have subscribed. All commands must be sent to the LISTSERV address, LISTSERVLIST.RTOWEST.ORG. It is very important to understand the difference between the two, but fortunately it is not complicated. The LISTSERV address is like a FAX number that connects you to a machine, whereas the list address is like a normal voice line connecting you to a person. If you make a mistake and dial the FAX number when you wanted to talk to someone on the phone, you will quickly realize that you used ...",5


Unnamed: 0,from,to,subject,content,category
15732,pljacklin@stoel.com,"abuckley@wutc.wa.gov, alan@epud.org, ali.rodol@ci.seattle.wa.us, \n\tarchive-power-ratecase@bpa.gov, aw_turner@pgn.com, bassl@sce.com, \n\tbessex@cowlitzpud.org, bferranti@mbllp.com, \n\tbpadiscovery@merkellaw.com, ccopatrny@e-z.net, crow@millernash.com, \n\tcub@teleport.com, cwagers@dcpud.org, dan.meek@usa.net, \n\tdarcy@tonkon.com, darkills@popud.com, dbkinnard@pplmt.com, \n\tdfaulk@puget.com, discoveryparalegal@bpa.gov, dldorrell@stoel.com, \n\tdmacgregor@morganlewis.com, doney@doneylaw.com, \n\tdoug_brawley@pngc.com, dparrish@prmllp.com, dws@keywaycorp.com, \n\teauverde@hotmail.com, eboyd@ppcpdx.org, efinklea@energyadvocates.com, \n\telisackf@aol.com, energlaw@aol.com, eric.larson@oremetwahchang.com, \n\terick_johnson@pngc.com, ghuhta@cowlitzpud.org, \n\thuse@eesconsulting.com, ias@dvclaw.com, jack.speer@alcoa.com, \n\tjacksonc@ida.net, janet.prewitt@state.or.us, jdeason@chbh.com, \n\tjeffn@subutil.com, jhartso@enron.com, jlh@mc-power.com, \n\tjohk@critfc.org, john.yanov@eweb.eugene.or.us, johncameron@dwt.com, \n\tjolynnr@mresearch.com, jpw@duncanallen.com, jrb_ora@televar.com, \n\tjsaven@pacifier.com, jschlect@avistacorp.com, \n\tkevin.clark@ci.seattle.wa.us, kknitte@gcpud.org, \n\tkmoxness@cencoast.com, kpom@mail.pacifier.com, kyle@ppcpdx.org, \n\tlawyer@teleport.com, lhamilton@avistaenergy.com, \n\tlpeters@pacifier.com, lwolv@worldaccessnet.com, lyn_williams@pgn.com, \n\tmarc.hellman@state.or.us, marss@perkinscoie.com, matts1@atg.wa.gov, \n\tmershanlaw@aol.com, mhain@ect.enron.com, michaelearly@earthlink.net, \n\tmike.macdougall@powerex.com, mmetzler@ci.tacoma.wa.us, \n\tmschaff@worldnet.att.net, mthomas@mtpower.com, paul@kinergyllc.com, \n\tpete.forsyth@kaiseral.com, pete@ashland.or.us, \n\tpeter@richardsonandoleary.com, pfox@bracepatt.com, \n\tpljacklin@stoel.com, pmurphy@mbllp.com, pnichols@idahopower.com, \n\tpobenchain@idahopower.com, pspi@dynegy.com, pvickery@tpud.org, \n\trachel@rnp.org, ratecase@pngc.com, rates@ppcpdx.org, \n\trbstrong@painehamble...",WP-02 Data Response,"requestnumber: DS-PL:001 firstname: Pamela lastname: Jacklin e-mail: pljacklinstoel.com exhibitwp-02-e-: JCG-01 and 02 pagenumbers: requesttext: Please disclose and/or produce copies of any and all communications, from October 16, 2000 to the present, between any BPA representative and any representative of any other party, concerning any of the following subjects: a. The general structure of the Cost Recovery Adjustment Clauses CRACs described in the testimony of the Joint Customers b. Any differences between the form of the CRACs described in the testimony of the Joint Customers and the CRACs contained in BPAs Amended Proposal, or those contained in BPAs Supplemental Proposal c. The detailed mechanics of implementing the CRACs described in the testimony of the Joint Customers d. Revenue effects of the CRACs described in the testimony of the Joint Customers, any modifications or e. Any aspect of the Partial Stipulation and Settlement Agreement and the Parties Proposal attached thereto. For purposes of this Request, disclose and/or produce has the meanings described below. With respect to oral communications, disclose and/or produce means to describe the time, date, approximate duration, and substantive content of the communication the identity and organizational affiliation of the communicating individuals whether the communication occurred in person or by telephone the location of the communication if in person the identity of any witnesses to the communications, including all individuals in the room if the communication occurred by speakerphone and whether the communication was made in the presence of or after coordination with BPA counsel if so, identify such counsel. For purposes of this Request, you need not disclose and/or produce oral communications that occurred during meetings noticed in compliance with Rate Case Rule 1010.7d. With respect to written communications, the Request includes communications in any recorded form, whether on paper, electronic ...",6


Unnamed: 0,from,to,subject,content,category
40175,exchangeinfo@nymex.com,sara.shackleton@enron.com,"(01-15) Cinergy, Entergy, Palo Verde, and COB Options Expiration","Notice No. 01-15 January 19, 2001 TO: All NYMEX Members/Member Firms All NYMEX Clearing Members All NYMEX Floor Traders All NYMEX Operations Managers FROM: George Henderson, Vice President RE:Options Expiration Operational Procedures for the Trading Floor and Clearing Members The expiration date for the February 2001 options contract for Cinergy NOG1, Entergy OTG1, Palo Verde VOG1 and Cob WOG1 is Thursday, January 25, 2001. GENERAL OPERATIONAL PROCEDURES All Clearing Members and Qualified Floor Traders that carried an options position as of the close of business day prior to the expiration day, or engaged in trading activity on Expiration Day in the expiring options contract will be required to have a knowledgeable, duly authorized representative present at their normal work station promptly at 5:30 p.m. until released by the Exchange staff as specified below. All adjustments and/or corrections, must be accompanied by relevant supporting documentation prior to being incorporated into expiration processing, in essence making the expiration processing an extension of the afternoon trade resolution procedures. All input to the NYMEX Clearing Department will conclude no later than 30 minutes after floor representatives are released. Exchange Clearing 299-2110, Floor Trade Correction 299-2068 and 299-2169 personnel, as well as a representative of the Floor Committee will be available to assist with the processing of notices of Exercise and Abandonment, position transfers, trade corrections and other questions or problems you may have. CLEARING DEPARTMENT OPERATIONAL PROCEDURES The Option Expiration process is a screen based process for which all information is provided on the screens on C21 terminals. No Option Expiration Reports will be provided. The following screens will assist you through the Option Expiration process: MEMBER TRADE INQUIRY Contains real-time top day trade information, trade information for the previous 4 business days and trades adjusted for the ...",7


Unnamed: 0,from,to,subject,content,category
46903,alerts@alerts.equityalert.com,alewis@ect.enron.com,Your News Alert for BRCM,"IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE Upgrades IMAGE DownGrades IMAGE Coverage Initiated IMAGE Stock Splits IMAGE Buybacks IMAGE Pos Pre-Announce IMAGE Neg Pre-Announce IMAGE IMAGE IMAGE IMAGE IMAGE Unsubscribe IMAGE Update my Membership / Profile IMAGE Forgot Username / Password IMAGE Add / Edit Alerts IMAGE View My Alerts IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE As requested, your News Alert for BRCM follows from EquityAlert.com. Form 144: Filing to Sell 100000 Shares of BROADCOM CORP COM A BRCM Jan 16, 2002 Vickers Stock Research via COMTEX -- Document Processing Date: January 16, 2002 Filer: NICHOLAS FAMILY TR94H&SN Relation: Stock Name: BROADCOM CORP COM A Stock CUSIP: 111320107 Stock Symbol: BRCM Exchange: NASDAQ Transaction date: November 05, 2001 Shares for sale: 100000 Value held: $4002500 Broker: MORGAN STANLEY, DEAN WITTER Copyright c 2002, Vickers Stock Research Corporation. All rights reserved. -0- IMAGE IMPORTANT NOTICE AND DISCLAIMER REGARDING THIS COURTESY EMAIL At your request, as a subscriber to our service, this email alert is being sent to you as a courtesy and is for information purposes only. We are a financial news re-distributor. We are not an investment advisory and do not purport to tell or suggest which companies you should monitor or which securities you should purchase or sell. In addition, not withstanding our policy of prohibiting employees from buying or selling securities of an advertising company for a period of 20 days following dissemination of the advertisement, we may not be able to effectively monitor our employees to ensure compliance with the same. Consequently, there may be sales and/or purchases of such securities by our employees prior to, during and immediately following the dissemination of the advertisement. Please note that 1 this email may not contain the full text of the press release issued by, or the research or other reports regarding, the Monitored Company and 2 the text of the advertisement,...",8


Unnamed: 0,from,to,subject,content,category
50338,emaildelivery@businesswire.com,tx-industrial-info-res@businesswire.com,"TVA Starts Construction of the Nation's First Energy Storage Plant,\n in an Advisory by Industrialinfo.com","Welcome to eMail News Delivery, a service from Business Wire. Here is your Industrial Information Resources Inc. news release. If you have received this in error please send a message to: eMailDeliverybusinesswire.com with the following command in the body of the message: unsubscribe TX-INDUSTRIAL-INFO-RES If you have questions about this service, please contact your Business Wire Account Executive or servicebusinesswire.com BW2047 OCT 24,2001 4:46 PACIFIC 07:46 EASTERN BWTX-INDUSTRIAL-INFO-RES TVA Starts Construction of the Nations First Energy Storage Plant, in an Advisory by Industrialinfo.com Business Editors & Energy Writers HOUSTON--BUSINESS WIRE--Oct. 24, 2001--The following is an advisory by Industrialinfo.com Industrial Information Resources, Inc Houston, Texas. The Tennessee Valley Authority NYSE:TVBNYSE:TVC recently started construction of the nations first large scale power storage facility at a two acre site off of Highway 373 just east of the Air Force Base near Columbus, MS. Construction of the $25 million facility is expected to take 18-24 months to complete, creating roughly 60 jobs before the project is finished in 2003. This innovative energy-storage plant is designed to improve power reliability and customer service, have limited environmental impact and contribute to economic growth for consumers in Mississippi and throughout the Tennessee Valley, says TVA Chairman Glenn McCullough Jr. TVA continues to set the pace for energy production and demonstrate its role as a national leader in the use of cutting-edge, 21st-Century technologies. The plant was designed by Regenesys Technologies Limited, part of Innogy plc UK and utilizes regenerative fuel cell technology. During periods of low electrical demand the cells are charged with electricity generated at other locations. The stored energy is then released during times of demand. The power storage facility is designed to store up to 120 megawatt-hours of energy and will be capable of providing p...",9


Unnamed: 0,from,to,subject,content,category
68019,clearbot@mailblaster.clearstation.com,kensey_subscriber@mailman.enron.com,"(ClearStation) Recommendation : ATVI (Short) by kensey, Tue Oct 23\n 2001","IMAGE IMAGE IMAGE IMAGE IMAGE Quote & 3-Point ViewProfile & FundamentalsKey RatiosAnalyst InfoEarnings EstimatesInsider TradingInteractive Graph ToolGraphs In BulkDiscussionRecommendationsNews Articles IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE IMAGE Recommend IMAGE IMAGE IMAGE IMAGE IMAGE kenseys RecommendList Reply Unsubscribe IMAGE IMAGE DO NOT reply to this email. Replies should be made via the underlined Reply link above. IMAGE kensey has recommended ATVI Short at Oct 23 2001 8:05AM Activision Inc NASDAQ:ATVI Symbol Last Time Change High Low Volume ATVI 38.72 11:54AM 2.06 38.75 36.75 994,700 Community Take 30 Long / 9 Short IMAGE kensey said: Tue Oct 23 08:04:00 2001 near resistance and overbought. cover north of 38. kensey Click here to see the full recommendation with annotated graph IMAGE IMAGE IMAGE IMAGE kenseys RecommendList Reply Unsubscribe IMAGE IMAGE ClearStation is not an investment advisory service, nor a registered investment advisor or broker-dealer and does not purport to tell or suggest which securities members should buy or sell for themselves. Members should always check with their licensed financial advisor and their tax advisor to determine the suitability of any investment. ClearStation, Inc. is a wholly-owned subsidiary of ETRADE Group, Inc. and an affiliate of ETRADE Securities, Inc. and ETRADE Bank. ETRADE Securities, Inc. and ETRADE Bank are wholly-owned subsidiaries of ETRADE Group, Inc. ETRADE Bank deposits are insured up to $100,000 by the FDIC. ETRADE Bank and ETRADE Securities, Inc. are separate but affiliated companies. ETRADE brokerage accounts are maintained by ETRADE Securities, Inc. Products offered by ETRADE Securities, Inc. are not insured by the FDIC, are not deposits or obligations of ETRADE Bank, are subject to investment risk, including possible loss of principal invested. ClearStation is a registered trademark of ClearStation, Inc. ETRADE is a registered trademark of ETRADE Securities, I...",10


In [12]:
silhouette_score(word_df_scaled, cluster_labels)

0.04806517549673259

In [None]:
# Instantiate an empty list of silhouette scores and cluster counts
silhouette_scores = []
n_clusters = []
eps = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.5, 0.75]

# Iterate over eps
for e in eps:
    
    # Instantiate
    dbscan_model = DBSCAN(eps=e, min_samples=8)

    # Fit
    cluster_labels = dbscan_model.fit_predict(word_df_scaled)
    
    # Find non-noise points
    non_noise = cluster_labels[cluster_labels!=-1]
    
    # Get silhouette score and number of clusters
    n_cluster = len(np.unique(non_noise))
    
    if n_cluster > 1:
        silhouette = silhouette_score(word_df_scaled, cluster_labels)
    else:
        silhouette = np.NaN
    
    # Append to list
    n_clusters.append(n_cluster)
    silhouette_scores.append(silhouette)

In [None]:
# Put in a dataframe
results_df = pd.DataFrame({'eps':eps, 'n_clusters':n_clusters, 'silhouette':silhouette_scores})
results_df