# Exploring industry returns in the Fama-French data-set

The [49 Industry Portfolios](https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/det_49_ind_port.html) data-set has a series of daily returns of 49 different industry types. Here, we look at some of their statistical properties.

The documentation for the Fama-French data-set can be found [here](https://plutopy.readthedocs.io/en/latest/FamaFrench.html) and [here](https://shyams80.github.io/plutoR/docs/reference/FamaFrench-class.html)

In [1]:
library(tidyverse)
library(ggthemes)
library(odbc)
library(plutoR)
library(quantmod)
library(lubridate)
library(reshape2)
library(PerformanceAnalytics)
library(ggrepel)
library(tbl2xts)

options("scipen"=999)
options(stringsAsFactors = FALSE)

source("config.R")
source("goofy/plot.common.R")

#initialize
famaFrench <- FamaFrench()

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.1     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.3
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: xts
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Registered S3 method overwritten by 'xts':
  method     from
  as.zoo.xts zoo 

Attaching package: ‘xts’

The following objects are masked from ‘package:dplyr’:

    first, last

Loading 

In [2]:
startDt <- as.Date("1990-01-01")

#value (market-cap) weighted
valueWtd <- famaFrench$Industry49Daily() %>%
    filter(RET_TYPE == 'AVWRD' & TIME_STAMP >= startDt) %>%
    mutate(R = RET/100) %>%
    select(DATE = TIME_STAMP, KEY_ID, R) %>%
    collect() %>% 
    # the KEY_ID column has industry id's
    # we want them as column names
    mutate(group=1) %>%
    spread(KEY_ID, R) %>%
    select(-group) %>%
    tbl_xts()

#equal weighted
equalWtd <- famaFrench$Industry49Daily() %>%
    filter(RET_TYPE == 'AEWRD' & TIME_STAMP >= startDt) %>%
    mutate(R = RET/100) %>%
    select(DATE = TIME_STAMP, KEY_ID, R) %>%
    collect() %>% 
    # the KEY_ID column has industry id's
    # we want them as column names
    mutate(group=1) %>%
    spread(KEY_ID, R) %>%
    select(-group) %>%
    tbl_xts()

In [3]:
lookbackDays <- 220 * 5

rrValueWtd <- rollapply(valueWtd, lookbackDays, Return.cumulative, by.column = F)
rrEqualWtd <- rollapply(equalWtd, lookbackDays, Return.cumulative, by.column = F)

rrValueWtd <- na.omit(rrValueWtd)
rrEqualWtd <- na.omit(rrEqualWtd)

names(rrValueWtd) <- names(valueWtd)
names(rrEqualWtd) <- names(equalWtd)

In [4]:
applyFnDf <- function(dframe, appFn){
    temp <- apply(dframe, 2, appFn)
    temp <- data.frame(temp)
    temp <- cbind(temp, row.names(temp))
    colnames(temp) <- c(as.character(substitute(appFn)), 'I')
    return(temp)
}

In [5]:
minValWtd <- applyFnDf(rrValueWtd, min)
minEqlWtd <- applyFnDf(rrEqualWtd, min)

maxValWtd <- applyFnDf(rrValueWtd, max)
maxEqlWtd <- applyFnDf(rrEqualWtd, max)

sdValWtd <- applyFnDf(rrValueWtd, sd)
sdEqlWtd <- applyFnDf(rrEqualWtd, sd)

statsValDf <- minValWtd %>%
    inner_join(maxValWtd) %>%
    inner_join(sdValWtd)

statsValDf %>% print()
statsEqlDf <- merge.data.frame(merge.data.frame(minEqlWtd, maxEqlWtd, by=0, all=T), sdEqlWtd, by=0, all=T)

#print("MKT CAP WT")
#print(head(statsValDf))

#print("EQ WT")
#print(head(statsEqlDf))

Joining, by = "I"
Joining, by = "I"


          min     I      max        sd
1  -0.2532453  AERO 2.723510 0.6732770
2  -0.4074237 AGRIC 2.918361 0.5828927
3  -0.7431826 AUTOS 4.305222 0.6150468
4  -0.7484332 BANKS 3.102721 0.8154830
5  -0.2183618  BEER 2.484587 0.5572048
6  -0.5767641 BLDMT 3.567515 0.5667385
7  -0.7936408 BOOKS 2.820014 0.6729852
8  -0.4499764 BOXES 1.980728 0.4530478
9  -0.4315746 BUSSV 1.778622 0.5353209
10 -0.3609480 CHEMS 2.706431 0.4673437
11 -0.7657270 CHIPS 5.189433 1.1536726
12 -0.3782547 CLTHS 3.524852 0.5133042
13 -0.5927093 CNSTR 3.573401 0.7403047
14 -0.9631609  COAL 7.302051 1.6672218
15 -0.2876000 DRUGS 3.749956 0.8628797
16 -0.3043374 ELCEQ 3.978620 0.8830644
17 -0.6453013 FABPR 3.848893 0.5982059
18 -0.6100470   FIN 4.382467 0.9356484
19 -0.1204453  FOOD 1.898008 0.3957456
20 -0.6996230   FUN 5.432048 0.7353736
21 -0.7739683  GOLD 3.117030 0.6819889
22 -0.3086047  GUNS 2.773612 0.7423928
23 -0.7421280 HARDW 6.542397 1.3518820
24 -0.3694879  HLTH 1.822324 0.4585224
25 -0.2153368 HSHLD 1.945

“column name ‘Row.names’ is duplicated in the result”

This notebook was created using [pluto](http://pluto.studio). Learn more [here](https://github.com/shyams80/pluto)