In [1]:
import unicodecsv
enrollments = []
with open('Udacity - Data Analysis/enrollments.csv') as csvFile:
    reader = unicodecsv.DictReader(csvFile)
    enrollments = list(reader)

enrollments[0]

{u'account_key': u'448',
 u'cancel_date': u'2015-01-14',
 u'days_to_cancel': u'65',
 u'is_canceled': u'True',
 u'is_udacity': u'True',
 u'join_date': u'2014-11-10',
 u'status': u'canceled'}

In [2]:
daily_engagement = []
with open('Udacity - Data Analysis/daily_engagement.csv') as csvFile:
    reader = unicodecsv.DictReader(csvFile)
    daily_engagement = list(reader)
daily_engagement[0]

{u'acct': u'0',
 u'lessons_completed': u'0.0',
 u'num_courses_visited': u'1.0',
 u'projects_completed': u'0.0',
 u'total_minutes_visited': u'11.6793745',
 u'utc_date': u'2015-01-09'}

In [3]:
project_submissions = []
with open('Udacity - Data Analysis/project_submissions.csv') as csvFile:
    reader = unicodecsv.DictReader(csvFile)
    project_submissions = list(reader)
project_submissions[0]

{u'account_key': u'256',
 u'assigned_rating': u'UNGRADED',
 u'completion_date': u'2015-01-16',
 u'creation_date': u'2015-01-14',
 u'lesson_key': u'3176718735',
 u'processing_state': u'EVALUATED'}

In [4]:
from datetime import datetime as dt

def parse_date(date):
    if date == '':
        return None
    else:
        return dt.strptime(date, '%Y-%m-%d')
    
def parse_bool(par):
    return bool(par)

def parse_int(par):
    if par == '':
        return None
    else:
        return int(par)
    
def parse_float(par):    
    if par == '':
        return None
    else:
        return float(par)
    



In [5]:
for enrollment in enrollments:
    enrollment['days_to_cancel'] = parse_int(enrollment['days_to_cancel'])
    enrollment['cancel_date'] = parse_date(enrollment['cancel_date'])
    enrollment['is_canceled'] = parse_bool(enrollment['is_canceled'])
    enrollment['is_udacity'] = parse_bool(enrollment['is_udacity'])
    enrollment['join_date'] = parse_date(enrollment['join_date'])

enrollments[0]

{u'account_key': u'448',
 u'cancel_date': datetime.datetime(2015, 1, 14, 0, 0),
 u'days_to_cancel': 65,
 u'is_canceled': True,
 u'is_udacity': True,
 u'join_date': datetime.datetime(2014, 11, 10, 0, 0),
 u'status': u'canceled'}

In [6]:
for engagement in daily_engagement:
    engagement['lessons_completed'] = parse_int(parse_float(engagement['lessons_completed']))
    engagement['num_courses_visited'] = parse_int(parse_float(engagement['num_courses_visited']))
    engagement['projects_completed'] = parse_int(parse_float(engagement['projects_completed']))
    engagement['total_minutes_visited'] = parse_float(engagement['total_minutes_visited'])
    engagement['utc_date'] = parse_date(engagement['utc_date'])
daily_engagement[0]

{u'acct': u'0',
 u'lessons_completed': 0,
 u'num_courses_visited': 1,
 u'projects_completed': 0,
 u'total_minutes_visited': 11.6793745,
 u'utc_date': datetime.datetime(2015, 1, 9, 0, 0)}

In [7]:
for project in project_submissions:
    project['completion_date'] = parse_date(project['completion_date'])
    project['creation_date'] = parse_date(project['creation_date'])
project_submissions[0]

{u'account_key': u'256',
 u'assigned_rating': u'UNGRADED',
 u'completion_date': datetime.datetime(2015, 1, 16, 0, 0),
 u'creation_date': datetime.datetime(2015, 1, 14, 0, 0),
 u'lesson_key': u'3176718735',
 u'processing_state': u'EVALUATED'}

In [8]:
enrollments_total = len(enrollments)
daily_engagement_total = len(daily_engagement)
project_submissions_total = len(project_submissions)
print "Total enrollments %s" % enrollments_total
print "Total engagements %s" % daily_engagement_total
print "Total submissions %s" % project_submissions_total

Total enrollments 1640
Total engagements 136240
Total submissions 3642


In [9]:
acct_enrollments = set()
for enrollment in enrollments:
    acct_enrollments.add(enrollment['account_key'])

acct_engagement = set()
for engagement in daily_engagement:
    acct_engagement.add(engagement['acct'])
    
acct_projects = set()
for project in project_submissions:
    acct_projects.add(project['account_key'])

In [10]:
print "Unique enrollments %s" % len(acct_enrollments)
print "Unique engagements %s" % len(acct_engagement)
print "Unique submissions %s" % len(acct_projects)

Unique enrollments 1302
Unique engagements 1237
Unique submissions 743


In [11]:
for engagement in daily_engagement:
    engagement[u'account_key'] = engagement.pop('acct')

daily_engagement[0]

{u'account_key': u'0',
 u'lessons_completed': 0,
 u'num_courses_visited': 1,
 u'projects_completed': 0,
 u'total_minutes_visited': 11.6793745,
 u'utc_date': datetime.datetime(2015, 1, 9, 0, 0)}