In [1]:
import requests
import pandas
from dateutil import parser
host = 'http://18.188.56.207:9200/'
requests.get(host + '_cat/indices/enron').content

b'yellow open enron jbf7NNxzRySs9hlNbHp-4Q 1 1 251734 27940 572.8mb 572.8mb\n'

In [2]:
doc = {
    "query" : {
        "match_all" : {}
    }
}
import json
r=requests.get(host + 'enron/_search', data=json.dumps(doc), headers={'Content-Type':'application/json'})
print(len(r.json()['hits']['hits']))

10


In [3]:
def elasticsearch_results_to_df(results):
    '''
    A function that will take the results of a requests.get 
    call to Elasticsearch and return a pandas.DataFrame object 
    with the results 
    '''
    hits = results.json()['hits']['hits']
    data = pandas.DataFrame([i['_source'] for i in hits], index = [i['_id'] for i in hits])
    data['date'] = data['date'].apply(parser.parse)
    return(data)

def print_df_row(row):
    '''
    A function that will take a row of the data frame and print it out
    '''
    print('____________________')
    print('RE: %s' % row.get('subject',''))
    print('At: %s' % row.get('date',''))
    print('From: %s' % row.get('sender',''))
    print('To: %s' % row.get('recipients',''))
    print('CC: %s' % row.get('cc',''))
    print('BCC: %s' % row.get('bcc',''))
    print('Body:\n%s' % row.get('text',''))
    print('____________________')


In [4]:
# Put elasticsearch results into a pandas.DataFrame object
df = elasticsearch_results_to_df(r)
print(df)
print_df_row(df.iloc[0])

                        date   
28 2002-01-14 22:55:37+00:00  \
29 2002-01-10 12:38:33+00:00   
30 2000-04-12 11:51:00+00:00   
31 2001-10-23 05:49:55+00:00   
32 2001-01-10 14:41:00+00:00   
33 2002-03-05 12:54:21+00:00   
34 2000-11-07 12:16:00+00:00   
35 2002-01-25 22:14:42+00:00   
36 2001-10-29 21:44:33+00:00   
37 2001-10-12 21:08:23+00:00   

                                                 text   
28  Steve, \n\nI know that we are meeting with Mik...  \
30  FYI\n---------------------- Forwarded by Shell...   
31  \n\n_____________________DAVID COURSEY________...   
32  Content-Transfer-Encoding: 7bit\nReturn-Path: ...   
33  \n[IMAGE] Forums Discuss these points in the F...   
34  Why is the fact that I'm moving down there mak...   
35  \n\n-----Original Message-----\nFrom: KSchier@...   
36  Hi John,\n\nHere is the information you were l...   
37  When: Thursday, October 18, 2001 9:00 AM-11:00...   

                                     sender   
28                kimberly.wa

In [5]:
# Query For a full text match in the "text" field
# Uses the "match" query: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
doc = {
    "query": {
        "match" : {
            "text" : "important reporting"
        } 
    },
    "from" : 0, # Starting message to return. 
    "size" : 2000, # Return this many messages. Can't be more than 10,000
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found {'value': 10000, 'relation': 'gte'} messages matching the query, of 
Returned 2000 messages
____________________
RE: RE: Reporting replication issue is now fixed
At: 2001-10-09 17:11:18+00:00
From: lynn.blair@enron.com
To: jennifer.lowry@enron.com  group.dl-ets@enron.com
CC: nan
BCC: nan
Body:
	Jennifer, how long has this been a problem?  Is there a concern we have caused
	customers problems in nominating due to bad information?  Thanks. Lynn

 -----Original Message-----
From: 	Lowry, Jennifer   
Sent:	Tuesday, October 09, 2001 10:11 AM
To:	DL-ETS TMS Modification Group
Subject:	Reporting replication issue is now fixed


Yesterday we noticed a problem where reports were not reporting on the correct cycle, or were not picking up information between cycles.  As it turns out, an important table was not being replicated from the application database to the reporting database.  

I was told that this problem has been fixed, and on first inspection of the tables, everything looks corre

In [None]:
# Query For a text match in the "text" or "subject" fields. Uses the multi-match query:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html
doc = {
  "query": {
    "multi_match" : {
      "query":    "settlement", 
      "fields": [ "subject", "text" ] 
    }
  }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[9])

Found {'value': 281, 'relation': 'eq'} messages matching the query, of 
Returned 10 messages
____________________
RE: RE: ENS - Draft Form of Acceptance
At: 2001-11-27 09:18:22+00:00
From: krzysztof.forycki@enron.com
To: ed.cattigan@enron.com  jarek.astramowicz@enron.com  adam.overfield@enron.com  anne.edgley@enron.com  lloyd.wantschek@enron.com  james.derrick@enron.com
CC: jacek.glowacki@enron.com  sophie.martin@enron.com  christian.hackett@enron.com  carys.willott@enron.com  brian.muscroft@enron.com  e..parrish@enron.com
BCC: jacek.glowacki@enron.com  sophie.martin@enron.com  christian.hackett@enron.com  carys.willott@enron.com  brian.muscroft@enron.com  e..parrish@enron.com
Body:
I agree with Ed proposal to accept the settlement.

Kris

-----Original Message-----
From: Cattigan, Ed 
Sent: 26 November 2001 15:30
To: Astramowicz, Jarek; Overfield, Adam; Forycki, Krzysztof; Edgley,
Anne; Wantschek, Lloyd; Derrick Jr., James
Cc: Glowacki, Jacek; Martin, Sophie; Hackett, Christian; Willo

In [None]:
# "OR" query for two phrase matches. Generally you get fancy query parsing with this:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
doc = {
    "query": {
        "query_string" : {
            "default_field" : "text",
            "query" : "(reach a settlement) OR (continue to pursue)" 
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query" % r.json()['hits']['total']['value'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found 10000 messages matching the query, of 
Returned 10 messages
____________________
RE: Settlement Conversation Recap
At: 2002-05-02 18:26:52+00:00
From: michael.bridges@enron.com
To: koikosp@talgov.com
CC: chris.germany@enron.com
BCC: chris.germany@enron.com
Body:
Hi Pete,

Wanted to recap our conversation yesterday, update you on our timing and get you my contact information.

Regarding a possible settlement, it appears to me that this is a concept that you have considered and are willing to pursue.  We will have a valuation of the contract for you Monday.  It is my suggestion that you review the proposal, make sure that you agree with the outstanding transactions and complete your own valuation.  Once we have agreed on the universe of transactions, you and I will discuss and finalize a settlement amount that we are comfortable will be approved by the creditor committee and bankruptcy judge.

From this point, Enron will begin the process of filing a motion for settlement and sched

In [None]:
df.iloc[9]

date                                  2001-05-31 11:21:00+00:00
recipients    lisa.mellencamp@enron.com  mary.heinitz@enron....
sender                                      kate.cole@enron.com
subject                     FW: Sale of East Coast Power L.L.C.
text          Please note that an additional subsidiary was ...
Name: 21281, dtype: object

In [None]:
# Do a count of all documents in the database by month
doc = {
    "aggs" : {
        "aggregation_var_name" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            }
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
def aggregation_to_df(response,var_name='aggregation_var_name'):
    r = response.json()
    r['aggregations'][var_name]
    df = pandas.DataFrame(r['aggregations'][var_name]['buckets'])
    df['date'] = df['key_as_string'].apply(parser.parse)
    df = df[(df['date'] >= '1999-01-1') & (df['date'] < '2002-07-01')]
    df = df[['date','doc_count']]
    return df
df = aggregation_to_df(r)
print(df)

                         date  doc_count
228 1999-01-01 00:00:00+00:00          8
229 1999-02-01 00:00:00+00:00          1
230 1999-03-01 00:00:00+00:00          7
231 1999-04-01 00:00:00+00:00          2
232 1999-05-01 00:00:00+00:00         35
233 1999-06-01 00:00:00+00:00         24
234 1999-07-01 00:00:00+00:00         45
235 1999-08-01 00:00:00+00:00         41
236 1999-09-01 00:00:00+00:00         74
237 1999-10-01 00:00:00+00:00         67
238 1999-11-01 00:00:00+00:00         64
239 1999-12-01 00:00:00+00:00        131
240 2000-01-01 00:00:00+00:00        200
241 2000-02-01 00:00:00+00:00        245
242 2000-03-01 00:00:00+00:00        261
243 2000-04-01 00:00:00+00:00        289
244 2000-05-01 00:00:00+00:00        328
245 2000-06-01 00:00:00+00:00        512
246 2000-07-01 00:00:00+00:00        449
247 2000-08-01 00:00:00+00:00        641
248 2000-09-01 00:00:00+00:00        711
249 2000-10-01 00:00:00+00:00        796
250 2000-11-01 00:00:00+00:00       1089
251 2000-12-01 0

In [None]:
# Do a count of all documents matching a query by month
doc = {
    "query": {
        "match" : {
            "text" : "important reporting"
        }
    },
    "aggs" : {
        "aggregation_var_name" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            }
        }
    }
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
df = aggregation_to_df(r)
print(df)

                         date  doc_count
228 1999-01-01 00:00:00+00:00          2
229 1999-02-01 00:00:00+00:00          0
230 1999-03-01 00:00:00+00:00          0
231 1999-04-01 00:00:00+00:00          0
232 1999-05-01 00:00:00+00:00          4
233 1999-06-01 00:00:00+00:00          0
234 1999-07-01 00:00:00+00:00          3
235 1999-08-01 00:00:00+00:00          1
236 1999-09-01 00:00:00+00:00          2
237 1999-10-01 00:00:00+00:00          0
238 1999-11-01 00:00:00+00:00          5
239 1999-12-01 00:00:00+00:00          6
240 2000-01-01 00:00:00+00:00          6
241 2000-02-01 00:00:00+00:00         14
242 2000-03-01 00:00:00+00:00         16
243 2000-04-01 00:00:00+00:00          4
244 2000-05-01 00:00:00+00:00         14
245 2000-06-01 00:00:00+00:00         19
246 2000-07-01 00:00:00+00:00         21
247 2000-08-01 00:00:00+00:00         35
248 2000-09-01 00:00:00+00:00         31
249 2000-10-01 00:00:00+00:00         39
250 2000-11-01 00:00:00+00:00         52
251 2000-12-01 0

In [None]:
# Search an exact match in a specific feild
doc = {
    "query": {
        "match" : {
            "recipients" : "stephen.schwarzbach@enron.com"
        } 
    },
}
r=requests.get(host + 'enron/_search',
               data=json.dumps(doc), headers={'Content-Type':'application/json'})
r.raise_for_status()
print("Found %s messages matching the query, of " % r.json()['hits']['total']['value'])
df = elasticsearch_results_to_df(r)
print("Returned %s messages" % df.shape[0])
print_df_row(df.iloc[0])

Found 3 messages matching the query, of 
Returned 3 messages
____________________
RE: Data Request
At: 2001-03-23 22:13:53+00:00
From: thomas.meers@enron.com
To: stephen.schwarzbach@enron.com
CC: tracy.geaccone@enron.com
BCC: tracy.geaccone@enron.com
Body:

Steve,

The attached files contain the headcount and G&A costs information you requested based on the 2001 Plan.  My apologies for not getting this to you sooner.  Please note that this information is representative of the three Clean Fuels companies but is based on my interpretation of what you wanted included within a particular line item.  Feel free to call me with any questions or comments you may have regarding this data.

Thanks,
	Tom Meers   X54899



  
____________________
