In [1]:
library(tidyverse)
library(lubridate)
library(stringr)

Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ---------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats

Attaching package: ‘lubridate’

The following object is masked from ‘package:base’:

    date



In [2]:
# Use stringsAsFactors=FALSE or dollar values get loaded as factors
adwords <- read.csv('adwords_clicks.csv', stringsAsFactors=FALSE)
glimpse(adwords)

Observations: 136
Variables: 4
$ Date   <chr> "1-Jul-17", "2-Jul-17", "3-Jul-17", "4-Jul-17", "5-Jul-17", ...
$ Cost   <chr> "$195.79 ", "$197.27 ", "$193.24 ", "$180.57 ", "$222.57 ", ...
$ Clicks <int> 284, 289, 278, 261, 319, 312, 311, 416, 359, 436, 207, 319, ...
$ Avg    <chr> "$0.69 ", "$0.68 ", "$0.70 ", "$0.69 ", "$0.70 ", "$0.56 ", ...


In [3]:
# Convert the quantitative data fields into numeric data types

adwords$Cost <- adwords$Cost %>% str_sub(2,-1) %>% as.numeric()
adwords$Avg <- adwords$Avg %>% str_sub(2,-1) %>% as.numeric()

glimpse(adwords)

Observations: 136
Variables: 4
$ Date   <chr> "1-Jul-17", "2-Jul-17", "3-Jul-17", "4-Jul-17", "5-Jul-17", ...
$ Cost   <dbl> 195.79, 197.27, 193.24, 180.57, 222.57, 175.78, 153.65, 222....
$ Clicks <int> 284, 289, 278, 261, 319, 312, 311, 416, 359, 436, 207, 319, ...
$ Avg    <dbl> 0.69, 0.68, 0.70, 0.69, 0.70, 0.56, 0.49, 0.54, 0.58, 0.49, ...


In [4]:
# Use a regular expression substitution to reorder the Date field and pass it into ymd() to
# convert it into a datetime field. Create new "YMD" field from it.

adwords$YMD <- sub("([0-9]+)-([A-Za-z]+)-([0-9]+)","\\3 \\2 \\1", adwords$Date) %>% ymd()
head(adwords)
glimpse(adwords)

Date,Cost,Clicks,Avg,YMD
1-Jul-17,195.79,284,0.69,2017-07-01
2-Jul-17,197.27,289,0.68,2017-07-02
3-Jul-17,193.24,278,0.7,2017-07-03
4-Jul-17,180.57,261,0.69,2017-07-04
5-Jul-17,222.57,319,0.7,2017-07-05
6-Jul-17,175.78,312,0.56,2017-07-06


Observations: 136
Variables: 5
$ Date   <chr> "1-Jul-17", "2-Jul-17", "3-Jul-17", "4-Jul-17", "5-Jul-17", ...
$ Cost   <dbl> 195.79, 197.27, 193.24, 180.57, 222.57, 175.78, 153.65, 222....
$ Clicks <int> 284, 289, 278, 261, 319, 312, 311, 416, 359, 436, 207, 319, ...
$ Avg    <dbl> 0.69, 0.68, 0.70, 0.69, 0.70, 0.56, 0.49, 0.54, 0.58, 0.49, ...
$ YMD    <date> 2017-07-01, 2017-07-02, 2017-07-03, 2017-07-04, 2017-07-05,...


In [5]:
# Add some columns that convert the datetime into a month and a weekday
adwords$Month <- adwords$YMD %>% month(label=TRUE, abbr = FALSE)
adwords$Weekday <- adwords$YMD %>% wday(label=TRUE, abbr = FALSE)
head(adwords)
glimpse(adwords)

Date,Cost,Clicks,Avg,YMD,Month,Weekday
1-Jul-17,195.79,284,0.69,2017-07-01,July,Saturday
2-Jul-17,197.27,289,0.68,2017-07-02,July,Sunday
3-Jul-17,193.24,278,0.7,2017-07-03,July,Monday
4-Jul-17,180.57,261,0.69,2017-07-04,July,Tuesday
5-Jul-17,222.57,319,0.7,2017-07-05,July,Wednesday
6-Jul-17,175.78,312,0.56,2017-07-06,July,Thursday


Observations: 136
Variables: 7
$ Date    <chr> "1-Jul-17", "2-Jul-17", "3-Jul-17", "4-Jul-17", "5-Jul-17",...
$ Cost    <dbl> 195.79, 197.27, 193.24, 180.57, 222.57, 175.78, 153.65, 222...
$ Clicks  <int> 284, 289, 278, 261, 319, 312, 311, 416, 359, 436, 207, 319,...
$ Avg     <dbl> 0.69, 0.68, 0.70, 0.69, 0.70, 0.56, 0.49, 0.54, 0.58, 0.49,...
$ YMD     <date> 2017-07-01, 2017-07-02, 2017-07-03, 2017-07-04, 2017-07-05...
$ Month   <ord> July, July, July, July, July, July, July, July, July, July,...
$ Weekday <ord> Saturday, Sunday, Monday, Tuesday, Wednesday, Thursday, Fri...


In [7]:
write_csv(adwords, "adwords_w_dates.csv")


In [8]:
dir()