# Origin file

In [1]:
#!/usr/bin/python

""" 
    Starter code for exploring the Enron dataset (emails + finances);
    loads up the dataset (pickled dict of dicts).

    The dataset has the form:
    enron_data["LASTNAME FIRSTNAME MIDDLEINITIAL"] = { features_dict }

    {features_dict} is a dictionary of features associated with that person.
    You should explore features_dict as part of the mini-project,
    but here's an example to get you started:

    enron_data["SKILLING JEFFREY K"]["bonus"] = 5600000
    
"""

import pickle

enron_data = pickle.load(open("../final_project/final_project_dataset.pkl", "r"))





# Number of people in the data set

In [6]:
print("num of people: %d" % len(enron_data))

num of people: 146


# Number of features

In [18]:
import pprint
firstKey = enron_data.keys()[0]
features = enron_data[firstKey]
pprint.pprint(features)
print ("num of features: %d" % len(features))


{'bonus': 600000,
 'deferral_payments': 'NaN',
 'deferred_income': 'NaN',
 'director_fees': 'NaN',
 'email_address': 'mark.metts@enron.com',
 'exercised_stock_options': 'NaN',
 'expenses': 94299,
 'from_messages': 29,
 'from_poi_to_this_person': 38,
 'from_this_person_to_poi': 1,
 'loan_advances': 'NaN',
 'long_term_incentive': 'NaN',
 'other': 1740,
 'poi': False,
 'restricted_stock': 585062,
 'restricted_stock_deferred': 'NaN',
 'salary': 365788,
 'shared_receipt_with_poi': 702,
 'to_messages': 807,
 'total_payments': 1061827,
 'total_stock_value': 585062}
num of features: 21


# Number of POIs from the dataset

In [26]:
POIs = []
for person, features in enron_data.items():
    if (features['poi'] == True):
        POIs.append(person)
pprint.pprint(POIs)
print("num of POIs: %d" % len(POIs))
        

['HANNON KEVIN P',
 'COLWELL WESLEY',
 'RIEKER PAULA H',
 'KOPPER MICHAEL J',
 'SHELBY REX',
 'DELAINEY DAVID W',
 'LAY KENNETH L',
 'BOWEN JR RAYMOND M',
 'BELDEN TIMOTHY N',
 'FASTOW ANDREW S',
 'CALGER CHRISTOPHER F',
 'RICE KENNETH D',
 'SKILLING JEFFREY K',
 'YEAGER F SCOTT',
 'HIRKO JOSEPH',
 'KOENIG MARK E',
 'CAUSEY RICHARD A',
 'GLISAN JR BEN F']
num of POIs: 18


# Number of existing POIs that we know

In [31]:
with open('../final_project/poi_names.txt', 'r') as f:
    content = f.readlines()

POIs = []
for line in content:
    if line[0] == '(':
        POIs.append(line)
        
pprint.pprint(POIs)
print("Number of existing POIs: %d" % len(POIs))

['(y) Lay, Kenneth\n',
 '(y) Skilling, Jeffrey\n',
 '(n) Howard, Kevin\n',
 '(n) Krautz, Michael\n',
 '(n) Yeager, Scott\n',
 '(n) Hirko, Joseph\n',
 '(n) Shelby, Rex\n',
 '(n) Bermingham, David\n',
 '(n) Darby, Giles\n',
 '(n) Mulgrew, Gary\n',
 '(n) Bayley, Daniel\n',
 '(n) Brown, James\n',
 '(n) Furst, Robert\n',
 '(n) Fuhs, William\n',
 '(n) Causey, Richard\n',
 '(n) Calger, Christopher\n',
 '(n) DeSpain, Timothy\n',
 '(n) Hannon, Kevin\n',
 '(n) Koenig, Mark\n',
 '(y) Forney, John\n',
 '(n) Rice, Kenneth\n',
 '(n) Rieker, Paula\n',
 '(n) Fastow, Lea\n',
 '(n) Fastow, Andrew\n',
 '(y) Delainey, David\n',
 '(n) Glisan, Ben\n',
 '(n) Richter, Jeffrey\n',
 '(n) Lawyer, Larry\n',
 '(n) Belden, Timothy\n',
 '(n) Kopper, Michael\n',
 '(n) Duncan, David\n',
 '(n) Bowen, Raymond\n',
 '(n) Colwell, Wesley\n',
 '(n) Boyle, Dan\n',
 '(n) Loehr, Christopher\n']
Number of existing POIs: 35


# Total stock value of James Prentice

In [35]:
print(enron_data['PRENTICE JAMES']['total_stock_value'])

1095040


# Number of emails from Wesley Colwell to POIs

In [37]:
print(enron_data['COLWELL WESLEY']['from_this_person_to_poi'])

11


# What’s the value of stock options exercised by Jeffrey K Skilling?

In [39]:
print(enron_data['SKILLING JEFFREY K']['exercised_stock_options'])

19250000


# Who took the most money? How much?

In [43]:
chairMan='LAY KENNETH L'
CFO='FASTOW ANDREW S'
CEO='SKILLING JEFFREY K'
moneyTheyTook = [enron_data[CEO]['total_payments'], enron_data[CFO]['total_payments'], enron_data[chairMan]['total_payments']]
print(moneyTheyTook)
print(max(moneyTheyTook))

[8682716, 2424083, 103559793]
103559793


# People with salary and email address

In [57]:
peopleWithSalary = [(x, y['salary']) for x, y in enron_data.items() if y['salary'] != 'NaN']
#pprint.pprint(peopleWithSalary)
print("number of ppl with salary: %d" % len(peopleWithSalary))

peopleWithEmail = [(x, y['salary']) for x, y in enron_data.items() if y['email_address'] != 'NaN']
print("number of ppl with email: %d" % len(peopleWithEmail))

number of ppl with salary: 95
number of ppl with email: 111


# People without payment data

In [74]:
peopleWithoutPayment = [[x, y['total_payments']] for x, y in enron_data.items() if y['total_payments'] == 'NaN']
pprint.pprint(peopleWithoutPayment)
print "Percentage of people without payment: " + str(float(len(peopleWithoutPayment))/len(enron_data))

[['CORDES WILLIAM R', 'NaN'],
 ['LOWRY CHARLES P', 'NaN'],
 ['CHAN RONNIE', 'NaN'],
 ['WHALEY DAVID A', 'NaN'],
 ['CLINE KENNETH W', 'NaN'],
 ['LEWIS RICHARD', 'NaN'],
 ['MCCARTY DANNY J', 'NaN'],
 ['POWERS WILLIAM', 'NaN'],
 ['PIRO JIM', 'NaN'],
 ['WROBEL BRUCE', 'NaN'],
 ['MCDONALD REBECCA', 'NaN'],
 ['SCRIMSHAW MATTHEW', 'NaN'],
 ['GATHMANN WILLIAM D', 'NaN'],
 ['GILLIS JOHN', 'NaN'],
 ['MORAN MICHAEL P', 'NaN'],
 ['LOCKHART EUGENE E', 'NaN'],
 ['SHERRICK JEFFREY B', 'NaN'],
 ['FOWLER PEGGY', 'NaN'],
 ['CHRISTODOULOU DIOMEDES', 'NaN'],
 ['HUGHES JAMES A', 'NaN'],
 ['HAYSLETT RODERICK J', 'NaN']]
Percentage of people without payment: 0.143835616438


# Add new entreis

In [82]:
new_feature = {
 'bonus': 'NaN',
 'deferral_payments': 'NaN',
 'deferred_income': 'NaN',
 'director_fees': 'NaN',
 'email_address': 'NaN',
 'exercised_stock_options': 'NaN',
 'expenses': 'NaN',
 'from_messages': 'NaN',
 'from_poi_to_this_person': 'NaN',
 'from_this_person_to_poi': 'NaN',
 'loan_advances': 'NaN',
 'long_term_incentive': 'NaN',
 'other': 'NaN',
 'poi': True,
 'restricted_stock': 'NaN',
 'restricted_stock_deferred': 'NaN',
 'salary': 'NaN',
 'shared_receipt_with_poi': 'NaN',
 'to_messages': 'NaN',
 'total_payments': 'NaN',
 'total_stock_value': 'NaN'}

#enron_data['POI1'] = new_feature
enron_data['POI2'] = new_feature
enron_data['POI3'] = new_feature
enron_data['POI4'] = new_feature
enron_data['POI5'] = new_feature
enron_data['POI6'] = new_feature
enron_data['POI7'] = new_feature
enron_data['POI8'] = new_feature
enron_data['POI9'] = new_feature
enron_data['POI10'] = new_feature

# Number of people and people without payment

In [84]:
len(enron_data)

156

In [87]:
peopleWithoutPayment = [[x, y['total_payments']] for x, y in enron_data.items() if y['total_payments'] == 'NaN']
pprint.pprint(peopleWithoutPayment)
print("num of people without payment: %d " % len(peopleWithoutPayment))

[['CORDES WILLIAM R', 'NaN'],
 ['POI3', 'NaN'],
 ['POI9', 'NaN'],
 ['LOWRY CHARLES P', 'NaN'],
 ['POI5', 'NaN'],
 ['POI6', 'NaN'],
 ['CHAN RONNIE', 'NaN'],
 ['WHALEY DAVID A', 'NaN'],
 ['CLINE KENNETH W', 'NaN'],
 ['LEWIS RICHARD', 'NaN'],
 ['MCCARTY DANNY J', 'NaN'],
 ['POWERS WILLIAM', 'NaN'],
 ['POI8', 'NaN'],
 ['PIRO JIM', 'NaN'],
 ['WROBEL BRUCE', 'NaN'],
 ['MCDONALD REBECCA', 'NaN'],
 ['SCRIMSHAW MATTHEW', 'NaN'],
 ['GATHMANN WILLIAM D', 'NaN'],
 ['POI2', 'NaN'],
 ['GILLIS JOHN', 'NaN'],
 ['POI4', 'NaN'],
 ['MORAN MICHAEL P', 'NaN'],
 ['POI7', 'NaN'],
 ['LOCKHART EUGENE E', 'NaN'],
 ['SHERRICK JEFFREY B', 'NaN'],
 ['FOWLER PEGGY', 'NaN'],
 ['CHRISTODOULOU DIOMEDES', 'NaN'],
 ['HUGHES JAMES A', 'NaN'],
 ['HAYSLETT RODERICK J', 'NaN'],
 ['POI1', 'NaN'],
 ['POI10', 'NaN']]
num of people without payment: 31 
