In [2]:
import unicodecsv
from datetime import datetime as dt

def read_csv(file_name):
    with open(file_name, 'rb') as f:
        reader = unicodecsv.DictReader(f)
        result = []
        for row in reader:
            result.append(row)
        return result

def convert_to_bool(row, key):
    value = row[key]
    row[key] = (value.lower() == 'true')

def convert_to_int(row, key):
    value = row[key]
    if value == '':
        row[key] = None
    else:
        row[key] = int(float(value))

def convert_to_date(row, key):
    value = row[key]
    if value == '':
        row[key] = None
    else:
        row[key] = dt.strptime(value, '%Y-%m-%d')


In [3]:
def read_enrollments():
    list = read_csv('enrollments.csv')
    for row in list:
        convert_to_int(row, "account_key")
        convert_to_date(row, "cancel_date")
        convert_to_int(row, "days_to_cancel")
        convert_to_bool(row, "is_canceled")
        convert_to_bool(row, "is_udacity")
        convert_to_date(row, "join_date")
    return list

enrollments = read_enrollments()


In [4]:
def read_daily_engagement():
    list = read_csv('daily_engagement.csv')
    for row in list:
        row["account_key"] = row["acct"]
        del row["acct"]
        convert_to_int(row, "account_key")
        convert_to_int(row, "lessons_completed")
        convert_to_int(row, "num_courses_visited")
        convert_to_int(row, "projects_completed")
        convert_to_int(row, "total_minutes_visited")
        convert_to_date(row, "utc_date")
    return list        

daily_engagement = read_daily_engagement()

In [5]:
def read_project_submission():
    list = read_csv('project_submissions.csv')
    for row in list:
        convert_to_int(row, "account_key")
        convert_to_date(row, "completion_date")
        convert_to_date(row, "creation_date")
        # other lesson_key, assined_rating, processing_state : String
    return list        

project_submissions = read_project_submission()
project_submissions[0]

{u'account_key': 256,
 u'assigned_rating': u'UNGRADED',
 u'completion_date': datetime.datetime(2015, 1, 16, 0, 0),
 u'creation_date': datetime.datetime(2015, 1, 14, 0, 0),
 u'lesson_key': u'3176718735',
 u'processing_state': u'EVALUATED'}

In [6]:
def get_unique_users_set(user_list):
    result = set()
    for x in user_list:
        result.add(x["account_key"])
    return result

enrollment_user_set = get_unique_users_set(enrollments)
daily_user_set = get_unique_users_set(daily_engagement)
project_user_set = get_unique_users_set(project_submissions)

print len(enrollment_user_set)
print len(daily_user_set)
print len(project_user_set)

1302
1237
743


In [18]:
udacity_accounts = set()
udacity_count = 0
for x in enrollments:
    if x["is_udacity"]:
        udacity_count += 1
        udacity_accounts.add(x["account_key"])

def remove_udacity_accounts(data_list):
    return [x for x in data_list if x["account_key"] not in udacity_accounts]

non_udacity_enrollments = remove_udacity_accounts(enrollments)
non_udacity_daily_engagement = remove_udacity_accounts(daily_engagement)
non_udacity_project_submissions = remove_udacity_accounts(project_submissions)

print len(non_udacity_enrollments)
print len(non_udacity_daily_engagement)
print len(non_udacity_project_submissions)

1622
135656
3634


In [86]:
paid_students = dict()

for x in non_udacity_enrollments:
    start = x["join_date"]
    end = x["cancel_date"]
    if end == None or (end - start).days > 7:
        acct = x["account_key"]
        join_date = x["join_date"]
        if not paid_students.has_key(acct) or join_date > paid_students[acct]["join_date"]:
            paid_students[acct] = x            

print len(paid_students)

995


In [93]:
def is_in_one_week(enrollment_day, engagement_day):
    delta = (engagement_day - enrollment_day).days
    return  delta < 7

paid_in_first_week_engagement = []

paid_count = 0

for eng in non_udacity_daily_engagement:
    acct = eng["account_key"]
    if paid_students.has_key(acct):
        enr = paid_students[acct]
        if is_in_one_week(enr["join_date"], eng["utc_date"]):
            paid_in_first_week_engagement.append(eng)
            
print len(paid_in_first_week_engagement)
            
        

21508
