In [1]:
options(warn=-1)

suppressMessages(library(dplyr))
source("loadData.R")

# load data
"Raw data:"
raw <- loadData()
format(head(raw,3), digits=3)

# clean data
"Cleaned data:"
clean <- cleanData(raw)
format(head(clean,3), digits=3)

# center and scale select features
"Centered and scaled data:"
wells <- unique(clean$Well.Name)
cs <- mutate(group_by(clean, Well.Name), 
             GR=as.numeric(scale(GR)), 
             ILD_log10=as.numeric(scale(ILD_log10)), 
             DeltaPHI=as.numeric(scale(DeltaPHI)), 
             PHIND=as.numeric(scale(PHIND)), 
             PE=as.numeric(scale(PE)))

format(head(cs,3), digits=3)

Facies,Formation,Well.Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,NM_M,RELPOS
3,A1 SH,SHRIMPLIN,2793,77.5,0.664,9.9,11.9,4.6,1,1.0
3,A1 SH,SHRIMPLIN,2794,78.3,0.661,14.2,12.6,4.1,1,0.979
3,A1 SH,SHRIMPLIN,2794,79.0,0.658,14.8,13.1,3.6,1,0.957


Facies,Formation,Well.Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,isMarine,RELPOS
FSiS,A1 SH,SHRIMPLIN,2793,77.5,0.664,9.9,11.9,4.6,False,1.0
FSiS,A1 SH,SHRIMPLIN,2794,78.3,0.661,14.2,12.6,4.1,False,0.979
FSiS,A1 SH,SHRIMPLIN,2794,79.0,0.658,14.8,13.1,3.6,False,0.957


Facies,Formation,Well.Name,Depth,GR,ILD_log10,DeltaPHI,PHIND,PE,isMarine,RELPOS
FSiS,A1 SH,SHRIMPLIN,2793,0.216,0.01855,0.512,-0.0487,0.421,False,1.0
FSiS,A1 SH,SHRIMPLIN,2794,0.237,0.00567,1.517,0.0736,-0.133,False,0.979
FSiS,A1 SH,SHRIMPLIN,2794,0.258,-0.00721,1.657,0.1648,-0.687,False,0.957


In [29]:
table(cs$Facies[cs$isMarine])
table(cs$Facies[!cs$isMarine])


  SS CSiS FSiS SiSh   MS   WS    D   PS   BS 
   0    6   16  266  285  579  140  674  185 


  SS CSiS FSiS SiSh   MS   WS    D   PS   BS 
 268  934  764    5   11    3    1   12    0 

In [54]:
library(randomForest)
library(caret)
library(dplyr)
source("accuracyMetrics.R")

df <- cs[complete.cases(cs) & cs$Well.Name != "Recruit F9",]
formations <- unique(df$Formation)

fits <- list()

for (f in formations) {
    df_i <- df[df$Formation == f,]
    df_i$Facies <- droplevels(df_i$Facies)
    
    wells <- unique(df_i$Well.Name)
    resamp_list <- list()
    for (w in wells) {
        if (sum(df_i$Well.Name == w) > 0) {
            resamp_list[[w]] <- which(df_i$Well.Name != w)
        }
    }
    
    print(f)
    print(table(df_i$Well.Name))
    print(subset(df_i, select=-c(Well.Name)))
    
    fitControl <- trainControl(method="cv", index=resamp_list, summaryFunction=myF1MetricCaret)
    fits[[as.character(f)]] <- train(Facies ~ ., data=subset(df_i, select=-c(Well.Name)), method="rf",
                       trControl=fitControl, metric="F1")
    print(fits[[as.character(f)]])
    print(fits[[as.character(f)]][["resample"]])
    print("-----------------------------------------------------------------------------")
}

[1] "A1 SH"

    ALEXANDER D CHURCHMAN BIBLE  CROSS H CATTLE        KIMZEY A        LUKE G U 
              0              19               0               0              53 
          NEWBY           NOLAN      Recruit F9         SHANKLE       SHRIMPLIN 
             43              44               0              63              43 
Source: local data frame [265 x 10]

   Facies Formation  Depth        GR    ILD_log10  DeltaPHI       PHIND
   (fctr)    (fctr)  (dbl)     (dbl)        (dbl)     (dbl)       (dbl)
1    FSiS     A1 SH 2793.0 0.2155819  0.018551798 0.5120999 -0.04865478
2    FSiS     A1 SH 2793.5 0.2372980  0.005670378 1.5169013  0.07359111
3    FSiS     A1 SH 2794.0 0.2584779 -0.007211043 1.6571062  0.16480534
4    FSiS     A1 SH 2794.5 0.4474882 -0.020092464 1.4467989  0.17702993
5    FSiS     A1 SH 2795.0 0.1386372 -0.054442918 1.3533290  0.21182299
6    FSiS     A1 SH 2795.5 0.1222831 -0.101674794 1.4701664  0.22780899
7    FSiS     A1 SH 2796.0 0.1155806 -0.127437635 

ERROR: Error in {: task 1 failed - "subscript out of bounds"
