# Transforming and Enriching Data

In [1]:
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 

First, install the required R packages if not done already. 

## Load necessary packages

In [3]:
library(knitr)
library(dplyr)
library(rmarkdown)

## Run the data access notebooks

In [4]:
load("01_Accessing_and_Reading_Local_Files.RData")
load("02_Accessing_and_Reading_Data_Lake_Files.RData")
load("03_Accessing_and_Reading_Database-Data_Lakehouse_Data.RData")


## Run the data joining notebook

In [5]:
load("01_Combining_Data.RData")

## Feature Engineering

### Replace codes with labels for demHomeOwner

In [6]:
df <- df %>%
  mutate(demHomeOwner = recode(DemHomeOwnerCode, 'U' = 'Unknown', 'H' = 'HomeOwner')) %>%
  select(-DemHomeOwnerCode)  # Drop the original column

head(df$demHomeOwner)  # Display the first few values

### Compute customer age

In [7]:
df <- df %>%
  mutate(customerAge = as.numeric(difftime(Sys.Date(), as.Date(birthDate), units = "days")) / 365.25) %>%
  mutate(customerAge = ifelse(!is.na(customerAge), as.integer(customerAge), NA))  # Replace NAs with NA

head(df$customerAge)  # Display the first few values

### Compute average purchase amount per ad

In [8]:
df <- df %>%
  mutate(AvgPurchasePerAd = AvgPurchaseAmount12 / intAdExposureCount12)

head(df$AvgPurchasePerAd)  # Display the first few values

In [9]:
save(df, file = "02_Transforming_and_Enriching_Data.RData")