# COMPUSTAT

- Load COMPUSTAT data
- Tickle the data
- Create variables
    + Book Equity
    + Operating Profit
    + Operating Profit less Research Costs
    + Accruals
    + Operating Profits less Research Costs less Accruals

In [None]:
library(data.table)    # read csv much faster than standard function
library(dplyr)         # infinitely nicer grouping operations
library(ggplot2)       # sexy plots

## Load Data

In [None]:
comp.path = 'C:/Data/CRSP/20171123_COMP_196001_201612.csv'
# fread rather smugly reports how quickly it has read our file
#   if we do not set showProgress=FALSE
comp = fread(comp.path, showProgress=FALSE)
setnames(comp, c("LPERMNO", "datadate"), c("PERMNO", "date"))

How many firms do we have?

In [None]:
options(repr.plot.width=8, repr.plot.height=3)

dt = comp %>% group_by(fyear) %>% summarise(N=n())
ggplot(dt, aes(x=fyear, y=N)) + geom_line()

We have many accounting variables from COMPUSTAT.
Oddly, `apc`, `arc` and `artfs` appear to be missing
in every row.
We will drop them along with weird stuff added by
COMPUSTAT.

In [None]:
comp = subset(comp, select=-c(apc, arc, artfs, indfmt, consol,
                              popsrc, datafmt, curcd, costat))

In [None]:
# str(comp)

## Short Years

There should be no duplicate `PERMNO` and `fyear` pairs.
This can happen when firms change their fiscal year-end.
In this case we will keep the most recent values.

In [None]:
# duplicated will omit return FALSE for the first instance of a duplicated row
# calling from the top and the bottom ensure all instances are caught
comp = comp %>% arrange(PERMNO, fyear, fyr)
pairs.ix = duplicated(comp[, c("PERMNO", "fyear")]) |
           duplicated(comp[, c("PERMNO", "fyear")], fromLast=TRUE)
comp[pairs.ix, 1:5]

In [None]:
pairs.ix = duplicated(comp[, c("PERMNO", "fyear")], fromLast=TRUE)
comp[pairs.ix, 1:5]

In [None]:
comp = comp[!pairs.ix,]
comp = comp[!is.na(comp$fyr),]

## Book Equity

$$BE = seq + txditc - ps$$
where $ps$ equals (in order of preference); $pstkrv$, $pstkl$ or $pstk$.
If all measures of preferred stock are missing,
$$BE = ceq + upstk$$
if we still do not have a value for $BE$,
$$BE = at - lt$$

In [None]:
comp$ps = comp$pstkrv                                   # Redemption
comp$ps[is.na(comp$ps)] = comp$pstkl[is.na(comp$ps)]    # Liquidation
comp$ps[is.na(comp$ps)] = comp$pstk[is.na(comp$ps)]     # Book
comp$ps[is.na(comp$ps)] = 0

comp$txditc[is.na(comp$txditc)] = 0

# Asness and Frazzini use only seq

comp$BE = comp$seq + comp$txditc - comp$ps

comp$upstk[is.na(comp$upstk)] = 0

# Common Equity PLUS Par Value of Preferred Stock?
ix = is.na(comp$BE)
comp$BE[ix] = comp$ceq[ix] + comp$upstk[ix]

ix = is.na(comp$BE)
comp$BE[ix] = comp$at[ix] - comp$lt[ix]

comp = comp %>% group_by(PERMNO) %>%
    mutate(D1.BE=BE-lag(BE)) %>% as.data.frame

In [None]:
dt = comp %>% group_by(fyear) %>% summarise(mssg=sum(is.na(BE))/n())
p = ggplot(dt, aes(x=fyear, y=mssg*100)) + geom_line()
p = p + scale_x_continuous(limits=c(1963, 2015),
                           breaks=seq(1963, 2015, 2))
p + theme(axis.text.x=element_text(angle=90, vjust=0.5))

## Operating Profit

$$OP = (revt - cogs - xsga - xint)/BE$$

In [None]:
expenses.OK = !is.na(comp$cogs) | !is.na(!comp$xsga) | !is.na(comp$xint)
comp$cogs[is.na(comp$cogs)] = 0
comp$xsga[is.na(comp$xsga)] = 0
comp$xint[is.na(comp$xint)] = 0

comp$op.OK = !is.na(comp$revt) & expenses.OK

comp$op = comp$revt - comp$cogs - comp$xsga - comp$xint

In [None]:
dt = comp %>% group_by(fyear) %>% summarise(mssg=sum(is.na(op))/n())
p = ggplot(dt, aes(x=fyear, y=mssg*100)) + geom_line()
p = p + scale_x_continuous(limits=c(1963, 2015), breaks=seq(1963, 2015, 2))
p = p + scale_y_continuous(limits=c(0, 15), breaks=seq(0, 15, 1))
p + theme(axis.text.x=element_text(angle=90, vjust=0.5))

In [None]:
comp$OP.OK = (comp$BE > 0) & comp$op.OK

comp$OP.OK[is.na(comp$OP.OK)] = FALSE

comp$OP = (comp$revt - comp$cogs - comp$xsga - comp$xint)/comp$BE

In [None]:
dt = comp %>% group_by(fyear) %>% summarise(mssg=sum(is.na(OP))/n())
p = ggplot(dt, aes(x=fyear, y=mssg*100)) + geom_line()
p = p + scale_x_continuous(limits=c(1963, 2015), breaks=seq(1963, 2015, 2))
p = p + scale_y_continuous(limits=c(0, 15), breaks=seq(0, 15, 1))
p + theme(axis.text.x=element_text(angle=90, vjust=0.5))

## Operating Profit less Research Costs

$$OP_r = (revt - cogs - xsga - xint + xrd)/BE$$

In [None]:
comp$xrd[is.na(comp$xrd)] = 0

comp$OPr = (comp$op + comp$xrd) / comp$BE

## Cash Profit

$$CP = (revt - cogs - xsga - xint + xrd + acc)/BE$$
where
$$acc = -\Delta rect -\Delta invt -\Delta xpp +\Delta drc +\Delta ap +\Delta xacc$$

In [None]:
comp$xpp[is.na(comp$xpp)] = 0
comp$invt[is.na(comp$invt)] = 0
comp$drc[is.na(comp$drc)] = 0
comp$xacc[is.na(comp$xacc)] = 0
comp$ap[is.na(comp$ap)] = 0
comp$rect[is.na(comp$rect)] = 0

comp = comp %>% group_by(PERMNO) %>% mutate(
    D1.rect=rect-lag(rect), D1.xpp=xpp-lag(xpp),
    D1.ap=ap-lag(ap), D1.invt=invt-lag(invt),
    D1.drc=drc-lag(drc), D1.xacc=xacc-lag(xacc)
) %>% as.data.frame

comp = comp %>% mutate(op.acc=-D1.rect-D1.invt-D1.xpp+D1.drc+D1.ap+D1.xacc)

comp$op.acc[is.na(comp$op.acc)] = 0

comp$CP = (comp$op + comp$xrd + comp$op.acc) / comp$BE

## Gross Profit

In [None]:
comp$GP = comp$gp / comp$at
ix = is.na(comp$GP)
comp$GP[ix] = (comp$revt[ix] - comp$cogs[ix]) / comp$at[ix]
comp$GP[is.infinite(comp$GP)] = NA

In [None]:
dt = comp %>% group_by(fyear) %>% summarise(mssg=sum(is.na(GP))/n())
p = ggplot(dt, aes(x=fyear, y=mssg*100)) + geom_line()
p = p + scale_x_continuous(limits=c(1963, 2015), breaks=seq(1963, 2015, 2))
p = p + scale_y_continuous(limits=c(0, 15), breaks=seq(0, 15, 1))
p + theme(axis.text.x=element_text(angle=90, vjust=0.5))

## Investment

In [None]:
comp = comp %>% group_by(PERMNO) %>%
    mutate(INV=(at-lag(at))/lag(at)) %>% as.data.frame
comp$INV[is.infinite(comp$INV)] = NA

In [None]:
dt = comp %>% group_by(fyear) %>% summarise(mssg=sum(is.na(INV))/n())
p = ggplot(dt, aes(x=fyear, y=mssg*100)) + geom_line()
p = p + scale_x_continuous(limits=c(1963, 2015), breaks=seq(1963, 2015, 2))
p = p + scale_y_continuous(limits=c(0, 35), breaks=seq(0, 35, 5))
p + theme(axis.text.x=element_text(angle=90, vjust=0.5))