In [None]:
install.packages("ggcorrplot")
#install.packages('car')

In [None]:
library(ggplot2)
library(tidyverse)
library(ggcorrplot)
#library(car)

In [None]:
# Load the cars data
file = '/content/bloodpress.txt'
pData = read.csv(file, header = TRUE, sep = '\t', row.names = 1, stringsAsFactors = FALSE)
head(pData, 5)

In [None]:
# Sample correlation matrix and correlation plot
sample_corrMatrix = cor(pData)
print(sample_corrMatrix)
# Standard R function to visualize correlation
pairs(pData)
# ggcorrplot R function to visualize correlation
ggcorrplot(sample_corrMatrix, method = "circle")

In [None]:
# Simple linear regression model for
# BP vs. Stress 
model = lm(data = pData, BP ~ Stress)
summary(model)

ggplot(data = pData, aes(x = Stress, y = BP)) +
   geom_point(size = 1, color = 'blue') +
   geom_smooth(method = lm, formula = y ~ x, color = 'red', se = FALSE) +
   geom_point(aes(x = mean(Stress), y = mean(BP)), size = 1.5, color = 'green') +
   geom_text(aes(x = mean(Stress), y = mean(BP)), label = 'mean sample', hjust = 0, vjust = -0.5, size = 6, color = 'green') +
   labs(x = 'Stress', y = 'BP') + 
   ggtitle("Sample regression line") +
   theme(axis.text = element_text(size = 12),
   axis.text.x = element_text(size = 14),
   axis.text.y = element_text(size = 14),
   axis.title = element_text(size = 14, face = "bold"))

In [None]:
# Simple linear regression model for
# BP vs. BSA
model = lm(data = pData, BP ~ BSA)
summary(model)

ggplot(data = pData, aes(x = BSA, y = BP)) +
   geom_point(size = 1, color = 'blue') +
   geom_smooth(method = lm, formula = y ~ x, color = 'red', se = FALSE) +
   geom_point(aes(x = mean(BSA), y = mean(BP)), size = 1.5, color = 'green') +
   geom_text(aes(x = mean(BSA), y = mean(BP)), label = 'mean sample', hjust = 0, vjust = -0.5, size = 6, color = 'green') +
   labs(x = 'BSA', y = 'BP') + 
   ggtitle("Sample regression line") +
   theme(axis.text = element_text(size = 12),
   axis.text.x = element_text(size = 14),
   axis.text.y = element_text(size = 14),
   axis.title = element_text(size = 14, face = "bold"))

In [None]:
# Multiplr linear regression model for
# BP vs. Stress + BSA
model = lm(data = pData, BP ~ Stress + BSA)
summary(model)

In [None]:
# Simple linear regression model for
# BP vs. Weight 
model = lm(data = pData, BP ~ Weight)
summary(model)

ggplot(data = pData, aes(x = Weight, y = BP)) +
   geom_point(size = 1, color = 'blue') +
   geom_smooth(method = lm, formula = y ~ x, color = 'red', se = FALSE) +
   geom_point(aes(x = mean(Weight), y = mean(BP)), size = 1.5, color = 'green') +
   geom_text(aes(x = mean(Weight), y = mean(BP)), label = 'mean sample', hjust = 0, vjust = -0.5, size = 6, color = 'green') +
   labs(x = 'Weight', y = 'BP') + 
   ggtitle("Sample regression line") +
   theme(axis.text = element_text(size = 12),
   axis.text.x = element_text(size = 14),
   axis.text.y = element_text(size = 14),
   axis.title = element_text(size = 14, face = "bold"))

In [None]:
# Multiple linear regression model for
# BP vs. BSA + Weight
model = lm(data = pData, BP ~ BSA + Weight)
summary(model)

In [None]:
# Multiple linear regression model for
# BP vs. Weight + BSA (notice the change in order of the predictors)
model = lm(data = pData, BP ~ Weight + BSA)
summary(model)

In [None]:
# Multiple linear regression model for
# BP vs. all predictors
model = lm(data = pData, BP ~ .)
summary(model)

In [None]:
# Calculate variance inflation factor (VIF) for model
# built using all predictors
vif(model)

In [None]:
# Multiple linear regression model for
# Weight vs. all remaining predictors
model = lm(data = pData, Weight ~ Age + BSA + Dur + Pulse + Stress)
summary(model)

In [None]:
# VIF for the predictor "Weight" is larger than 5
# Investigate the sample correlation matrix and throw
# away predictors highly correlated with Weight (BSA, Pulse)
# Multiple linear regression model for
# BP vs. all predictors except BSA and Pulse
model = lm(data = pData, BP ~ Age + Weight + Dur + Stress)
summary(model)

In [None]:
vif(model) # no multicollinearity detected

In [None]:
# Throw away the insignificant predictors
model = lm(data = pData, BP ~ Age + Weight)
summary(model)

In [None]:
pData = pData %>% mutate(StressLevel = case_when(
  Stress >= 0 & Stress <= 50 ~ 'Low',
  Stress > 50  ~ 'High'
  ))
head(pData, n = 5)    