Pharmaceutical drug spending by countries. Data from https://datahub.io/core/pharmaceutical-drug-spending and https://data.oecd.org/healthres/pharmaceutical-spending.htm.

Total pharmaceutical spending refers in most countries to “net” spending i.e. adjusted for possible rebates payable by manufacturers, wholesalers or pharmacies. This indicator is measured as a share of total health spending, in USD per capita (using economy-wide PPPs) and as a share of GDP. Pharmaceuticals consumed in hospitals and other health care settings are excluded. 

In [None]:
install.packages("plotly")


  There is a binary version available but the source version is later:
       binary source needs_compilation
plotly  4.9.3 4.10.1             FALSE



installing the source package 'plotly'



In [None]:
library(dplyr)
library(plotly)
library(ggplot2)
library(lubridate)

## Data Import

In [None]:
df <- read.csv("C:/Users/Sydney/Desktop/pharmaspending.csv")

In [None]:
head(df)

## Data Exploration and Cleaning

In [None]:
unique(df$FLAG_CODES)

In [None]:
#check NA values
colSums(is.na(df))

In [None]:
str(df)

In [None]:
#ensure no obvious outliers
summary(df)

## Data Summaries

% in HC Expenditure by country over time - graph this
Total spend by country per year over time - graph this


In [None]:
#find and compare average v median spend 
hce_cntry_ave_spnd <- df %>%
select(LOCATION, TOTAL_SPEND) %>%
group_by(LOCATION) %>%
summarize(AVE_SPEND = mean(TOTAL_SPEND))


hce_cntry_med_spnd <- df %>%
select(LOCATION, TOTAL_SPEND) %>%
group_by(LOCATION) %>%
summarize(MED_SPEND = median(TOTAL_SPEND))

hce_cntry_spnd <- merge(hce_cntry_ave_spnd, hce_cntry_med_spnd, by='LOCATION')

In [None]:
# graph average and median spend for easy comparison
hce_cntry_spnd_g <- ggplot(hce_cntry_spnd, aes(LOCATION)) + 
    geom_point(aes(y=AVE_SPEND),color='slateblue4') + 
    geom_point(aes(y=MED_SPEND), color='deeppink1') +
    geom_segment(aes(y = MED_SPEND, yend = AVE_SPEND, xend = LOCATION)) +
    xlab('Country') + ylab('Total Healthcare Spend') +
    theme_bw() +  scale_y_log10() +
    labs(title = 'Average v. Median Healthcare Spend by Country',
         subtitle = "1970-2016. Total Spend in Millions.",
         caption = "Dates not all inclusive. Note y axis log10 scale.",
         color="legend") +
    theme(axis.text.x = element_text(angle = 45, hjust=1)) +
    theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) 

hce_cntry_spnd_g

In [None]:
#find and compare average v median % of GDP
hce_cntry_ave_gdp <- df %>%
select(LOCATION, PC_GDP) %>%
group_by(LOCATION) %>%
summarize(AVE_GDP = mean(PC_GDP))


hce_cntry_med_gdp <- df %>%
select(LOCATION, PC_GDP) %>%
group_by(LOCATION) %>%
summarize(MED_GDP = median(PC_GDP))

hce_cntry_gdp <- merge(hce_cntry_ave_gdp, hce_cntry_med_gdp, by='LOCATION') 

In [None]:
# graph average and median spend for easy comparison
hce_cntry_gdp_g <- ggplot(hce_cntry_gdp, aes(LOCATION)) + 
    geom_point(aes(y=AVE_GDP),color='slateblue4') + 
    geom_point(aes(y=MED_GDP), color='deeppink1') +
    geom_segment(aes(y = MED_GDP, yend = AVE_GDP, xend = LOCATION)) +
    xlab('Country') + ylab('Healthcare Expenditure as a % of GDP') +
    theme_bw() +  scale_y_log10() +
    labs(title = 'Average v. Median Healthcare Expenditure as a % of GDP',
         subtitle = "1970-2016. Total Spend in Millions.",
         caption = "Dates not all inclusive. Note y axis log10 scale.",
         color="legend") +
    theme(axis.text.x = element_text(angle = 45, hjust=1)) +
    theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) 

hce_cntry_gdp_g

In [None]:
head(df)

In [None]:
cntry_gdp_plot <- ggplot(df, aes(x=TIME, y=PC_GDP, color=LOCATION)) + geom_point() + theme_bw() + 
    xlab('Year') + ylab('Percent of GDP') + ggtitle('Percent of GDP per Year by Country')

ggplotly(cntry_gdp_plot)