In [44]:
#######################################################################################################################
## Parse Event Log 
## Written by: Sijun He
## Last Modified by: Sijun He
#######################################################################################################################
## The code extracts interaction events out of the log and store it in a dict for further development
## 1. Dictionary video_names:  
##      keys: the names of each video in the course
##      values: dictionarys (video) of data for each video
## 2. Dictionary video:  
##      keys: the names of users that watched the video
##      values: dictionarys (user) of data for each user
## 3. Dictionary user:  
##      keys: the four types of different events: 'play_video','stop_video','pause_video','seek_video'
##      values: a list of datas, for 'seek_video', the data is a tuple of ((old_time, new_time)
##                               for others, the data is just video_current_time
#######################################################################################################################
import csv, re
import numpy as np
fileName = 'EarthSciences_ResGeo202_Spring2015_VideoInteraction.csv'
video_names = [] ## a list for all the video names
video_name_id_matching = {} ## one on one matching for video_name to videoID
user_watched_video = {} ## a dictionary for the video each user watched
event_type = ['play_video','stop_video','pause_video','seek_video']
with open('../../data/' + fileName,'r') as csvfile :
    lines = csv.reader(csvfile, delimiter = ',', quotechar = '"')
    for line in lines :       
        if line[0] in event_type and re.match('Unit\s\d+\.\d+',line[1]):
            if line[1] not in video_names:
                video_names.append(line[1])
                video_name_id_matching[line[14]] = line[1]
            if line[13] not in user_watched_video:
                user_watched_video[line[13]] = []
            if line[1] not in user_watched_video[line[13]]:
                user_watched_video[line[13]].append(line[1])

## Sorted videos in chronological order 
sorted_video_name = sorted(video_names, key=lambda video: float(re.search('(?<=Unit)\s\d+\.\d+',video).group(0)))

## Create a matrix of user - video watched relation 
## 1 = user watched this video
## 0 = user didn't watch this video
outputName = 'EarthSciences_ResGeo202_Spring2015_UserVideo_Matrix.csv'
outputFile = open(outputName, 'w')
columnNames = 'UserName,'
for video in sorted_video_name:
    columnNames += (video + ',')
columnNames = columnNames[:-1] + '\n'
outputFile.write(columnNames)
for user in user_watched_video.keys():
    newLine = user + ','
    for video in sorted_video_name:
        if video in user_watched_video[user]:
            newLine += '1,'
        else:
            newLine += '0,'
    newLine = newLine[:-1] + '\n'
    outputFile.write(newLine)

In [42]:
user_watched_video['70e57033c52922dc01e9363090d3a0704ec6643f'][0][:-1]

'Unit 1.1 - Course Overvie'

In [29]:
video_names

['Unit 2.2 - Relative Stress Magnitudes',
 'Unit 1.1 - Course Overview',
 'Unit 12.3 - Case Studies of Wellbore Stability 1, 2',
 'Unit 12.4 - Case Studies of Wellbore Stability 3-5',
 'Unit 12.5 - Case Studies of Wellbore Stability 6, 7',
 'Unit 12.6 - Case Studies of Wellbore Stability 8, 9',
 'Unit 9.2 - More on MiniFrac Tests',
 'Unit 11.3 - Least Principle Stress in the Gulf of Mexico',
 'Unit 11.4 - How NOT to Predict Shmin',
 'Unit 2.3 - Absolute Stress Magnitudes',
 'Unit 2.4 - Stress Variations',
 'Unit 15.3 - Introduction to Dynamic Hydrocarbon Migration in The Gulf of Mexico',
 'Unit 4.1 - Introduction to Rock Deformation Constitutive Laws',
 'Unit 3.1 - Basic Concepts',
 'Unit 1.2 - Overview of Units 2, 3',
 'Unit 9.3 - Hydraulic Fracturing and Shmax from Wellbore Breakouts',
 'Unit 2.1 - The Principal Stresses',
 'Unit 6.3 - The Critically-Stressed Crust III',
 'Unit 5.1 - Mohr-Columb Failure Criteria',
 'Unit 1.3 -  Overview of Units 4-8',
 'Unit 6.4 - Limits on Stress Ma

In [31]:
sorted(video_names, key=lambda video: float(re.search('(?<=Unit)\s\d+\.\d+',video).group(0)))

['Unit 1.1 - Course Overview',
 'Unit 1.2 - Overview of Units 2, 3',
 'Unit 1.3 -  Overview of Units 4-8',
 'Unit 1.4 - Overview of Units 9-17',
 'Unit 1.5 - Overview of Units 18-20',
 'Unit 2.1 - The Principal Stresses',
 'Unit 2.2 - Relative Stress Magnitudes',
 'Unit 2.3 - Absolute Stress Magnitudes',
 'Unit 2.4 - Stress Variations',
 'Unit 3.1 - Basic Concepts',
 'Unit 3.2 - Pore Pressure Compartments',
 'Unit 3.3 - The Gulf of Mexico',
 'Unit 3.4 - Mechanisms of Overpressure',
 'Unit 3.5 - Pore Pressure Prediction',
 'Unit 3.6 - The Macondo Well',
 'Unit 4.1 - Introduction to Rock Deformation Constitutive Laws',
 'Unit 4.2 - Poroelasticity',
 'Unit 4.3 - Viscoplasticity of Sands',
 'Unit 5.1 - Mohr-Columb Failure Criteria',
 'Unit 5.2 - Other Failure Criteria',
 'Unit 5.3 - End Cap Failure Criteria',
 'Unit 5.4 - DARS',
 'Unit 5.5 - Rock Strength from Geophysical Logs',
 'Unit 5.6 - Rock Tensile Strength',
 'Unit 5.7 - Vertical Hydraulic Fracture Growth',
 'Unit 6.1 - The Critical

In [28]:
if re.match('Unit\s\d+\.\d+','Unit 1.1'):
    print "yes"

yes
