In [2]:
# A function to collect fundamental data from Yahoo finance (Income Statement, Balance Sheet and Cash Flow statements)

getFin <- function(stock){
  if ("rvest" %in% installed.packages()) {
    library(rvest)
    }else{
      install.packages("rvest")
      library(rvest)
      }
  for (i in 1:length(stock)) {
    tryCatch(
      {
        # Collect the Income Statement data
        link <- "https://finance.yahoo.com/quote/"
        link <- paste0(link, stock[i], "/financials?p=", stock[i])
        wahis.session <- html_session(link)
        p <- wahis.session %>%
          html_nodes(xpath = '//*[@id="Col1-1-Financials-Proxy"]/section/div[3]/table')%>%
          html_table(fill = TRUE)
        IncomeStatement <- p[[1]]
        colnames(IncomeStatement) <- paste(IncomeStatement[1,])
        IncomeStatement <- IncomeStatement[-c(1,5,12,20,25),]
        names_row <- paste(IncomeStatement[,1])
        IncomeStatement <- IncomeStatement[,-1]
        IncomeStatement <- apply(IncomeStatement, 2, function(x){gsub(",","",x)})
        IncomeStatement <- as.data.frame(apply(IncomeStatement, 2, as.numeric))
        rownames(IncomeStatement) <- paste(names_row)
        temp1 <- IncomeStatement
        
        # Collect the Balance Sheet data
        link <- "https://finance.yahoo.com/quote/"
        link <- paste0(link, stock[i],"/balance-sheet?p=", stock[i])
        wahis.session <- html_session(link)
        p <- wahis.session %>%
          html_nodes(xpath = '//*[@id="Col1-1-Financials-Proxy"]/section/div[3]/table')%>%
          html_table(fill = TRUE)
        BalanceSheet <- p[[1]]
        colnames(BalanceSheet) <- BalanceSheet[1,]
        BalanceSheet <- BalanceSheet[-c(1,2,17,28),]
        names_row <- BalanceSheet[,1]
        BalanceSheet <- BalanceSheet[,-1]
        BalanceSheet <- apply(BalanceSheet, 2, function(x){gsub(",","",x)})
        BalanceSheet <- as.data.frame(apply(BalanceSheet, 2, as.numeric))
        rownames(BalanceSheet) <- paste(names_row)
        temp2 <- BalanceSheet
        
        # Collect the Cash Flow data
        link <- "https://finance.yahoo.com/quote/"
        link <- paste0(link, stock[i], "/cash-flow?p=", stock[i])
        wahis.session <- html_session(link)
        p <- wahis.session %>%
          html_nodes(xpath = '//*[@id="Col1-1-Financials-Proxy"]/section/div[3]/table')%>%
          html_table(fill = TRUE)
        CashFlow <- p[[1]]
        colnames(CashFlow) <- CashFlow[1,]
        CashFlow <- CashFlow[-c(1,3,11,16),]
        names_row <- CashFlow[,1]
        CashFlow <- CashFlow[,-1]
        CashFlow <- apply(CashFlow, 2, function(x){gsub(",","",x)})
        CashFlow <- as.data.frame(apply(CashFlow, 2, as.numeric))
        rownames(CashFlow) <- paste(names_row)
        temp3 <- CashFlow
        
        assign(paste0(stock[i],'.f'),value = list(IncomeStatement = temp1, BalanceSheet = temp2, CashFlow = temp3), envir = parent.frame())
        },
      error = function(cond){
        message(stock[i], "Give error ",cond)
        }
      )
  }
}

In [8]:
# Select the tickers we wish to collect
symbols <- c("GOOG", "MSFT", "HOG")

# Apply the function
suppressWarnings(tryCatch(getFin(symbols)))

head(GOOG.f)

Unnamed: 0,12/31/2018,12/31/2017,12/31/2016,12/31/2015
Total Revenue,136819000.0,110855000.0,90272000.0,74989000.0
Cost of Revenue,59549000.0,45583000.0,35138000.0,28164000.0
Gross Profit,77270000.0,65272000.0,55134000.0,46825000.0
Research Development,21419000.0,16625000.0,13948000.0,12282000.0
Selling General and Administrative,24459000.0,19765000.0,17470000.0,15183000.0
Non Recurring,,,,
Others,,,,
Total Operating Expenses,105427000.0,81973000.0,66556000.0,55629000.0
Operating Income or Loss,31392000.0,28882000.0,23716000.0,19360000.0
Total Other Income/Expenses Net,3521000.0,-1689000.0,434000.0,291000.0

Unnamed: 0,12/31/2018,12/31/2017,12/31/2016,12/31/2015
Cash And Cash Equivalents,16701000.0,10715000.0,12918000.0,15409000.0
Short Term Investments,92439000.0,91156000.0,73415000.0,56517000.0
Net Receivables,21193000.0,18705000.0,15632000.0,13459000.0
Inventory,1107000.0,749000.0,268000.0,491000.0
Other Current Assets,4236000.0,2983000.0,3175000.0,1590000.0
Total Current Assets,135676000.0,124308000.0,105408000.0,90114000.0
Long Term Investments,13859000.0,7813000.0,5878000.0,5183000.0
"Property, plant and equipment",59719000.0,42383000.0,34234000.0,29016000.0
Goodwill,17888000.0,16747000.0,16468000.0,15869000.0
Intangible Assets,2220000.0,2692000.0,3307000.0,3847000.0

Unnamed: 0,12/31/2018,12/31/2017,12/31/2016,12/31/2015
Net Income,30736000.0,12662000.0,19478000.0,16348000.0
Depreciation,9029000.0,6899000.0,6100000.0,5024000.0
Adjustments To Net Income,3298000.0,8284000.0,7158000.0,5609000.0
Changes In Accounts Receivables,-2169000.0,-3768000.0,-2578000.0,-2094000.0
Changes In Liabilities,1438000.0,1121000.0,333000.0,246000.0
Changes In Inventories,,,,
Changes In Other Operating Activities,7890000.0,3682000.0,2420000.0,1618000.0
Total Cash Flow From Operating Activities,47971000.0,37091000.0,36036000.0,26572000.0
Capital Expenditure,-25139000.0,-13184000.0,-10212000.0,-9950000.0
Investments,-1972000.0,-19448000.0,-18229000.0,-13635000.0
