In [None]:
df = read.csv("SAU EEZ 851 v50-1.csv")

In [None]:
head(df)

In [None]:
library(dplyr)
df2 = df %>% group_by(year, gear_type) %>% summarize(tonnes=sum(tonnes), landed_value=sum(landed_value, na.rm=T))
write.csv(df2, "SAU EEZ 851 v50-1-gearyear.csv", row.names=F)

In [None]:
df = read.csv("SAU EEZ 851 v50-1-gearyear.csv")

In [None]:
library(ggplot2)

In [None]:
ggplot(df, aes(x=year, y=tonnes)) +
  geom_col()

In [None]:
ggplot(df, aes(x=year, y=tonnes, fill=gear_type)) +
  geom_col()

Question 1: What is the tonnage caught with a particular gear in a given year?

In [None]:
df$tonnes[df$year == 2000]

In [None]:
sum(df$tonnes[df$year == 2000])

In [None]:
subset(df, year == 2000 & gear_type == "bottom trawl")

Question 2: What is the average value for a given gear type?

In [None]:
ggplot(subset(df, gear_type == 'bottom trawl'), aes(x=tonnes)) +
  geom_histogram()

In [None]:
mean(df$tonnes[df$gear_type == 'bottom trawl'])

In [None]:
quantile(df$tonnes[df$gear_type == 'bottom trawl'])

Statistical model:
$$tonnes \sim \mathcal{N}(MEAN, STDDEV)$$

In [None]:
sd(df$tonnes[df$gear_type == 'bottom trawl'])

In [None]:
faketonnes = rnorm(1000, 415197.568343814, 318485.223031723)

In [None]:
ggplot(data.frame(tonnes=faketonnes), aes(x=tonnes)) +
  geom_histogram()

In [None]:
pdf1 = data.frame(type='real', tonnes=df$tonnes[df$gear_type == 'bottom trawl'])

In [None]:
pdf2 = data.frame(type='fake', tonnes=faketonnes)

In [None]:
pdf = rbind(pdf1, pdf2)

In [None]:
ggplot(pdf, aes(x=tonnes, colour=type)) +
  geom_density()

Statistical model:
$$log(tonnes) \sim \mathcal{N}(MEAN, STDDEV)$$

In [None]:
mean(log(df$tonnes[df$gear_type == 'bottom trawl']))

In [None]:
sd(log(df$tonnes[df$gear_type == 'bottom trawl']))

In [None]:
faketonnes = rlnorm(1000, 12.661429700571, 0.761876733242941)

In [None]:
pdf2 = data.frame(type='fake', tonnes=faketonnes)

In [None]:
pdf = rbind(pdf1, pdf2)

In [None]:
ggplot(pdf, aes(x=tonnes, colour=type)) +
  geom_density()

Question 3: Is bottom trawling declining?

In [None]:
ggplot(subset(df, gear_type == 'bottom trawl'), aes(x=year, y=tonnes)) +
  geom_col()

Statistical model:
$$tonnes \sim \mathcal{N}(INTERCEPT + SLOPE \cdot year, STDDEV)$$

In [None]:
lm(tonnes ~ year, subset(df, gear_type == 'bottom trawl'))

In [None]:
summary(lm(tonnes ~ year, subset(df, gear_type == 'bottom trawl')))

In [None]:
ggplot(subset(df, gear_type == 'bottom trawl'), aes(x=year, y=tonnes)) +
  geom_col() + geom_abline(intercept=21119880, slope=-10433)

Statistical model:
$$log(tonnes) \sim \mathcal{N}(INTERCEPT + SLOPE \cdot year, STDDEV)$$

In [None]:
summary(lm(log(tonnes) ~ year, subset(df, gear_type == 'bottom trawl')))

In [None]:
df$predicted = exp(74.211344 - 0.031015*df$year)

In [None]:
ggplot(subset(df, gear_type == 'bottom trawl'), aes(x=year, y=tonnes)) +
  geom_col() + geom_line(aes(y=predicted))