# Exploring industry returns in the Fama-French data-set

The [49 Industry Portfolios](https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/det_49_ind_port.html) data-set has a series of daily returns of 49 different industry types. Here, we look at some of their statistical properties.

The documentation for the Fama-French data-set can be found [here](https://plutopy.readthedocs.io/en/latest/FamaFrench.html) and [here](https://shyams80.github.io/plutoR/docs/reference/FamaFrench-class.html)

In [1]:
library(tidyverse)
library(ggthemes)
library(odbc)
library(plutoR)
library(quantmod)
library(lubridate)
library(reshape2)
library(PerformanceAnalytics)
library(ggrepel)
library(tbl2xts)

options("scipen"=999)
options(stringsAsFactors = FALSE)

source("config.R")
source("goofy/plot.common.R")

#initialize
famaFrench <- FamaFrench()

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.1     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.3
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: xts
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Registered S3 method overwritten by 'xts':
  method     from
  as.zoo.xts zoo 

Attaching package: ‘xts’

The following objects are masked from ‘package:dplyr’:

    first, last

Loading 

In [2]:
startDt <- as.Date("1990-01-01")

#value (market-cap) weighted
valueWtd <- famaFrench$Industry49Daily() %>%
    filter(RET_TYPE == 'AVWRD' & TIME_STAMP >= startDt) %>%
    mutate(R = RET/100) %>%
    select(DATE = TIME_STAMP, KEY_ID, R) %>%
    collect() %>% 
    # the KEY_ID column has industry id's
    # we want them as column names
    mutate(group=1) %>%
    spread(KEY_ID, R) %>%
    select(-group) %>%
    tbl_xts()

#equal weighted
equalWtd <- famaFrench$Industry49Daily() %>%
    filter(RET_TYPE == 'AEWRD' & TIME_STAMP >= startDt) %>%
    mutate(R = RET/100) %>%
    select(DATE = TIME_STAMP, KEY_ID, R) %>%
    collect() %>% 
    # the KEY_ID column has industry id's
    # we want them as column names
    mutate(group=1) %>%
    spread(KEY_ID, R) %>%
    select(-group) %>%
    tbl_xts()

In [3]:
lookbackDays <- 220 * 5

rrValueWtd <- rollapply(valueWtd, lookbackDays, Return.cumulative, by.column = F)
rrEqualWtd <- rollapply(equalWtd, lookbackDays, Return.cumulative, by.column = F)

rrValueWtd <- na.omit(rrValueWtd)
rrEqualWtd <- na.omit(rrEqualWtd)

names(rrValueWtd) <- names(valueWtd)
names(rrEqualWtd) <- names(equalWtd)

In [4]:
minValWtd <- apply(rrValueWtd, 2, min)
minValWtd <- data.frame(minValWtd)

minEqlWtd <- apply(rrEqualWtd, 2, min)
minEqlWtd <- data.frame(minEqlWtd)

maxValWtd <- apply(rrValueWtd, 2, max)
maxValWtd <- data.frame(maxValWtd)

maxEqlWtd <- apply(rrEqualWtd, 2, max)
maxEqlWtd <- data.frame(maxEqlWtd)

sdValWtd <- apply(rrValueWtd, 2, sd)
sdValWtd <- data.frame(sdValWtd)

sdEqlWtd <- apply(rrEqualWtd, 2, sd)
sdEqlWtd <- data.frame(sdEqlWtd)

temp <- merge(minValWtd, maxValWtd, by=0, all=T)
print(temp)
statsValDf <- merge.data.frame(temp, sdValWtd, by=0, all=T)
statsEqlDf <- merge.data.frame(merge.data.frame(minEqlWtd, maxEqlWtd, by=0, all=T), sdEqlWtd, by=0, all=T)

#print("MKT CAP WT")
#print(head(statsValDf))

#print("EQ WT")
#print(head(statsEqlDf))

   Row.names  minValWtd maxValWtd
1       AERO -0.2532453  2.723510
2      AGRIC -0.4074237  2.918361
3      AUTOS -0.7431826  4.305222
4      BANKS -0.7484332  3.102721
5       BEER -0.2183618  2.484587
6      BLDMT -0.5767641  3.567515
7      BOOKS -0.7936408  2.820014
8      BOXES -0.4499764  1.980728
9      BUSSV -0.4315746  1.778622
10     CHEMS -0.3609480  2.706431
11     CHIPS -0.7657270  5.189433
12     CLTHS -0.3782547  3.524852
13     CNSTR -0.5927093  3.573401
14      COAL -0.9631609  7.302051
15     DRUGS -0.2876000  3.749956
16     ELCEQ -0.3043374  3.978620
17     FABPR -0.6453013  3.848893
18       FIN -0.6100470  4.382467
19      FOOD -0.1204453  1.898008
20       FUN -0.6996230  5.432048
21      GOLD -0.7739683  3.117030
22      GUNS -0.3086047  2.773612
23     HARDW -0.7421280  6.542397
24      HLTH -0.3694879  1.822324
25     HSHLD -0.2153368  1.945329
26     INSUR -0.5480310  2.577640
27     LABEQ -0.5442701  2.384042
28      MACH -0.3163298  2.758806
29     MEALS -

“column name ‘Row.names’ is duplicated in the result”

This notebook was created using [pluto](http://pluto.studio). Learn more [here](https://github.com/shyams80/pluto)