Skip to content

Commit

Permalink
duration by bin
Browse files Browse the repository at this point in the history
  • Loading branch information
tlevine committed Jun 5, 2013
1 parent 2971843 commit 2690c74
Showing 1 changed file with 30 additions and 1 deletion.
31 changes: 30 additions & 1 deletion tidy.r
Expand Up @@ -9,13 +9,42 @@ load <- function() {

# data.frame -> data.frame
set.types <- function(df) {
df$ts <- as.POSIXct(df$ts, origin = '1970-01-01')
df$date <- as.POSIXct(df$ts, origin = '1970-01-01')
df$ts <- NULL
df$uid <- as.factor(df$uid)
df$nick <- as.factor(df$nick)
df$status <- factor(df$status, levels = c('avail', 'notavail'))
df
}

# Total duration online based on the date column
# If you want to group by uid or date, do that first.
# data.frame -> data.frame
duration <- function(df, start.date, end.date) {
# Skip empty data frames.
if (nrow(df) == 0) {
return(0)
}

# Deal with sessions spanning date cuts
if (df$status[1] == 'notavail') {
head.duration <- df$date[1] - start.date
df <- df[-1,]
} else {
head.duration <- 0
}
if (df$status[nrow(df)] == 'avail') {
tail.duration <- end.date - df$date[nrow(df)]
df <- df[-nrow(df),]
} else {
tail.duration <- 0
}

# Now we can assume that the data frame starts with 'avail', ends with 'notavail' and alternates.
avail <- ((1:nrow(df)) %% 2) == 1
cumsum(df[-avail,'date'] - df[avail,'date']) + head.duration + tail.duration
}

# IO ()
main <- function () {
df <- load()
Expand Down

0 comments on commit 2690c74

Please sign in to comment.