In [557]:
# Libraries
library(tidyverse) # for data manipulation

In [558]:
# Functions
factorize <- function(df){ # Create a function
  for(i in which(sapply(df, class) == "character")) # that looks for variables with the character class 
      df[[i]] = as.factor(df[[i]]) # and converts them to factor (i.e., categorical) class
  return(df)
}

unfactorize <- function(df){ # Create a function
  for(i in which(sapply(df, class) == "factor")) # that looks for variables with the character class 
      df[[i]] = as.character(df[[i]]) # and converts them to factor (i.e., categorical) class
  return(df)
}

In [559]:
# Data
df <- factorize(read.csv("20240528_PhD_IED.csv")) # Put csv into a dataframe called docData
colnames(df) # Get an overview of the dataframe
dim(df)

## Sampling extra letters (do only once)

In [560]:
#women <- df %>% 
#filter(authorGender=="F") %>% 
#select(docauthorid) %>% 
#unique()

#men <- df %>% 
#filter(authorGender=="M") %>% 
#select(docauthorid) %>% 
#unique()

#unknowns <- df %>% 
#filter(is.na(authorGender)) %>% 
#select(docauthorid) %>% 
#unique()

#set.seed(1)
#W01 <- sample_n(women, 73, replace = FALSE)
#W01 <- W01 %>% 
#pull(docauthorid)

#set.seed(1)
#M01 <- sample_n(men, 9, replace = FALSE)
#M01 <- M01 %>% 
#pull(docauthorid)

#set.seed(1)
#U01 <- sample_n(unknowns, 26, replace = FALSE)
#U01 <- U01 %>% 
#pull(docauthorid)

#women <- df[df$docauthorid %in% W01, ]
#men <- df[df$docauthorid %in% M01, ]
#unknowns <- df[df$docauthorid %in% U01, ]
#subset <- rbind(women, men, unknowns)
#summary(subset)

#write.csv(subset, "20240528_PhD_IEDSubset.csv", row.names=FALSE)

## Checking and correcting the sample 

In [650]:
# On subsequent passes, import the sample
subset <- read.csv("20240528_PhD_IEDSubset.csv")

# Number of observations
nrow(subset)

# Distribution of men and women by country.
temp <- subset %>% 
select(docauthorid, authorLocation, authorGender)  %>% 
unique()
table(temp$authorLocation, temp$authorGender)
round(prop.table(table(temp$authorLocation, temp$authorGender)), digits = 2)

# Number of NAs
sum(is.na(temp$authorGender))

        
          F  M
  Canada 16  2
  USA    58  7

        
            F    M
  Canada 0.19 0.02
  USA    0.70 0.08

In [651]:
# subset <- unfactorize(subset)

# Correcting names and genders

subset$authorGender[subset$docauthorid=="IED0125"] <- "M"
subset$U[subset$docauthorid=="IED0125"] <- TRUE # His letters are addressed to his mother Lady Caledon

subset$docauthorname[subset$docid=="47550"] <- "William Cunningham" # No other Elizabeth Weir letters or letters attributed to William or Wm so leaving docauthorid as-is

subset$docauthorname[subset$docauthorid=="IED0635"] <- "Margret Johnston"

subset$docauthorname[subset$docauthorid=="IED0089.89"] <- "Henry"

subset$docauthorname[subset$docauthorid=="IED0893"] <- "Thomas Bouchier" 
subset$authorGender[subset$docauthorid=="IED0893"] <- "M"

subset$docauthorname[subset$docauthorid=="IED0233"] <- "G. J."

subset$authorGender[subset$docauthorid=="IED0089.73"] <- "F" # Refers to her husband

subset$authorGender[subset$docauthorid=="IED0123"] <- "M"
subset$docauthorname[subset$docauthorid=="IED0123"] <- "Charles Martin"

subset$docauthorname[subset$docauthorid=="IED0212"] <- "Ellen S. Dunlop"

# Reconciling docauthorids within the subset

subset$docauthorid[subset$docid=="37901"] <- "IED0650.1" # The Mary who wrote 37901 in 1883 is different than the Mary who wrote 35864 in 1815
subset$docauthorname[subset$docid=="37901"] <- "Mary 1"

subset$docauthorid[subset$docauthorid=="IED0646"] <- "IED0645" 
subset$docauthorname[subset$docauthorid=="IED0645"] <- "Marion D. Wilson"

subset$docauthorid[subset$docauthorid=="IED0315"] <- "IED0314"
subset$docauthorname[subset$docauthorid=="IED0314"] <- "Isabella Marshall Allen"

subset$docauthorname[subset$docauthorid=="IED0636"] <- "Margaret Emily Kells"

subset$docauthorname[subset$docauthorid=="IED0719"] <- "Fannie Haslett"

subset$authorGender[subset$docauthorid=="IED0089.28"] <- "F"
subset$docauthorid[subset$docauthorid=="IED0089.28"] <- "IED0317" # Antelope Hotel "Bella" 45775

subset$docauthorname[subset$docauthorid=="IED0768"] <- "Robert Anderson" #37535 Addressee
subset$authorGender[subset$docauthorid=="IED0768"] <- "M"

subset$docauthorname[subset$docauthorid=="IED0075"] <- "Annie Wyly" 

subset$docauthorname[subset$docauthorid=="IED0552"] <- "John S Crawford"

subset$docauthorid[subset$docauthorid=="IED0119"] <- "IED0130"
subset$docauthorname[subset$docauthorid=="IED0130"] <- "Catherine Fitzgerald" 
subset$authorGender[subset$docauthorid=="IED0130"] <- "F"

vals <- c("IED0219", # Refers to self as nephew
          "IED0243" # Refers to self as brother
         )
subset$authorGender[subset$docauthorid %in% vals] <- "M"

# Fixing other bits and pieces

subset$F[subset$docauthorid=="IED0089.79"] <- TRUE # Speaks of farming own land and wage labour building wall / planting
subset$L[subset$docauthorid=="IED0089.79"] <- TRUE

subset$docmonth[subset$docid=="47721"] <- NA

# Omitting unsuitable letters

vals <- c("43949", # Too short
          "25745", # Too short
          "48326", # Short, notice 
          "29307", # Short, estate
          "22948", # Short, death notice
          "46028", # Short, estate
          "32310", # Short, business
          "40071", # Estate
          "31251", # Estate
          "49415", # Estate
          "41236", # Co-authored
          "26249", # Duplicate of 27000
          "35387", # Letter in NAILDOH
          "27052", # Letter in NAILDOH
          "38698", # Letter in NAILDOH
          "50236", # Letter in NAILDOH
          "32934", # Estate
          "53442", # Multiple letters appear to be from Ireland
          "49109", # Family history
          "43219", # Duplicate of 47402
          "44023", # Estate
          "39225", # Co-authored
          "36427", # Family history
          "20600", #French
          "20695", #Business
          "20794", #Political-Business
          "20857", #Business
          "21024", #Short
          "21184", #Business
          "21255", #Business
          "21256", #Business
          "21307", #Business (Personal Estate),
          "21319", #Life insurance
          "21502", #Estate,
          "21610", #Business
          "21637", #Death notice
          "21649", #French
          "21704", #Business
          "21762", #Business
          "21769", #French
          "21842", #Estate
          "21934", #Business
          "21947", #Introduction
          "22001", #Introduction
          "22008", #French
          "22096", #French
          "22346", #Business
          "22381", #Invitation
          "22438", #Business
          "22520", #Short (postcard)
          "22725", #Short (postcard)
          "22946", #Introduction
          "23345", #Business
          "23402", #Business
          "23560", #French
          "23684", #Business
          "23699", #Business
          "23795", #Short
          "23848", #Introduction
          "24073", #Business
          "24215", #Official business
          "24706", #Financial matters
          "24716", #Transactional
          "24743", #Estate
          "35574", #Family history, death notice
          "33755", #Duplicate
          "48326", #Brief invitation
          "30078", #Business
          "40991", #This year appears incorrect relative to other Stavely letters.
          "26432" #Business / legal
         )

subset <- subset[!subset$docid %in% vals, ]

# Reconciling docauthorids with the sampling frame

vals <- c("23590", "50454", "31379", "27261", "26789", "26511", "21334") # Assigning Mary Savage to her proper docauthorid
subset$docauthorid[subset$docid %in% vals] <- "IED0621"
subset$docauthorname[subset$docauthorid=="IED0621"] <- "Mary Savage"
subset$relMin[subset$docauthorid=="IED0621"] <- FALSE # Mentions going to Protestant churches
subset$L[subset$docauthorid=="IED0621"] <- FALSE # Mentions traveling 2nd cabin, which is equivalent to 1st class hotel

subset$docauthorid[subset$docauthorid=="IED0600"] <- "IED0611"
subset$docauthorname[subset$docauthorid=="IED0611"] <- "M. E. Ling" 
subset$authorGender[subset$docauthorid=="IED0611"] <- "F"

subset$docauthorid[subset$docauthorid=="IED0632"] <- "IED0692"
subset$docauthorname[subset$docauthorid=="IED0692"] <- "Mitilda Ferguson" 

subset$authorGender[subset$docauthorid=="IED0899"] <- "M"
subset$docauthorid[subset$docauthorid=="IED0899"] <- "IED0932"
subset$docauthorname[subset$docauthorid=="IED0932"] <- "Thomas W. Coskery"

subset$docauthorid[subset$docauthorid=="IED0569"] <- "IED0572"
subset$docauthorname[subset$docauthorid=="IED0572"] <- "Joseph Carswell"

subset$docauthorid[subset$docauthorid=="IED0654"] <- "IED0657"
subset$docauthorname[subset$docauthorid=="IED0657"] <- "May Ann Blair"

subset$docauthorid[subset$docauthorid=="IED0183"] <- "IED0701"
subset$docauthorname[subset$docauthorid=="IED0701"] <- "Edith Gass"

subset$authorGender[subset$docauthorid=="IED0345"] <- "M"
subset$docauthorid[subset$docauthorid=="IED0345"] <- "IED0403"
subset$docauthorname[subset$docauthorid=="IED0403"] <- "James Gamble"

subset$docauthorid[subset$docauthorid=="IED0274"] <- "IED0267"
subset$docauthorname[subset$docauthorid=="IED0267"] <- "Hannah B. Longstreet" 
subset$authorGender[subset$docauthorid=="IED0267"] <- "F"

subset$docauthorid[subset$docauthorid=="IED0317"] <- "IED0107"
subset$docauthorname[subset$docauthorid=="IED0107"] <- "Isabella Weir Moore" # Married name per 20910

subset$docauthorid[subset$docauthorid=="IED1043"] <- "IED0958"
subset$docauthorid[subset$docauthorid=="IED1044"] <- "IED0958" # 49253 IED website Willie J. Weir
subset$docauthorname[subset$docauthorid=="IED0958"] <- "William J. Weir" # 42325 "thrashing" ref

subset$docauthorid[subset$docauthorid=="IED1007"] <- "IED0957"
subset$docauthorname[subset$docauthorid=="IED0957"] <- "William J. Stavely" # Dear mother in Belfast
subset$M[subset$docauthorid=="IED0957"] <- TRUE # Speaks of business as occupation
subset$authorGender[subset$docauthorid=="IED0957"] <- "M"

subset$docauthorid[subset$docauthorid=="IED0837"] <- "IED0838"
subset$docauthorid[subset$docauthorid=="IED0836"] <- "IED0838"
subset$docauthorname[subset$docauthorid=="IED0838"] <- "Ruth J. Ramsey" # Married name per 20910

subset$docauthorid[subset$docauthorid=="IED0698"] <- "IED0710"
subset$docauthorname[subset$docauthorid=="IED0710"] <- "Martha Blair Cranston" # Incorrect name in IED

# Summary statistics

# Number of observations
nrow(subset)

# Distribution of men and women by country.
temp <- subset %>% 
select(docauthorid, authorLocation, authorGender)  %>% 
unique()
table(temp$authorLocation, temp$authorGender)
round(prop.table(table(temp$authorLocation, temp$authorGender)), digits = 2)

#Number of NAs
sum(is.na(temp$authorGender))

        
          F  M
  Canada 13  2
  USA    52 14

        
            F    M
  Canada 0.16 0.02
  USA    0.64 0.17

## Adding letters by authors already in sample (do only once!)

In [652]:
#vals <- c("IED0621", 
          #"IED0611", 
          #"IED0692",
          #"IED0932", 
          #"IED0572",
          #"IED0657",
          #"IED0701",
          #"IED0403",
          #"IED0267",
          #"IED0107", 
          #"IED0958", #Weir
          #"IED1044", #Weir
          #"IED0957",
          #"IED0838", #Ramsey
          #"IED0836", #Ramsey
          #"IED0710")

#all <- df$docid[df$docauthorid %in% vals]
#omit <- subset$docid[subset$docauthorid %in% vals]
#toAdd <- setdiff(all, omit)
          
#write.csv(toAdd, "20240531_PhD_IEDSubset01.csv", row.names=FALSE)

In [653]:
toAdd <- read.csv("20240531_PhD_IEDSubset01.csv")
toAdd <- toAdd$x
toAdd <- df[df$docid %in% toAdd,]
nrow(toAdd)

In [654]:
#To Omit
vals <- c("21463", # Business
          "22946", # Introduction
          "27008", # Business
          "30982", # Business
          "31706", # Too brief
          "33862", # Death notice
          "37756", # Business
          "38560", # Business
          "49860", # Business
          "52005" # Introduction
         )
          
toAdd <- toAdd[!toAdd$docid %in% vals, ]

In [655]:
# Add rows to subset and correct names and genders

subset <- rbind(subset, toAdd)

# Check to make sure names and gender corrections come through.

vals <- c("23590", "50454", "31379", "27261", "26789", "26511", "21334") # Assigning Mary Savage to her proper docauthorid
subset$docauthorid[subset$docid %in% vals] <- "IED0621"
subset$docauthorname[subset$docauthorid=="IED0621"] <- "Mary Savage"
subset$relMin[subset$docauthorid=="IED0621"] <- FALSE # Mentions going to Protestant churches
subset$L[subset$docauthorid=="IED0621"] <- FALSE # Mentions traveling 2nd cabin, which is equivalent to 1st class hotel

subset$docauthorid[subset$docauthorid=="IED0600"] <- "IED0611"
subset$docauthorname[subset$docauthorid=="IED0611"] <- "M. E. Ling" 
subset$authorGender[subset$docauthorid=="IED0611"] <- "F"

subset$docauthorid[subset$docauthorid=="IED0632"] <- "IED0692"
subset$docauthorname[subset$docauthorid=="IED0692"] <- "Mitilda Ferguson" 

subset$authorGender[subset$docauthorid=="IED0899"] <- "M"
subset$docauthorid[subset$docauthorid=="IED0899"] <- "IED0932"
subset$docauthorname[subset$docauthorid=="IED0932"] <- "Thomas W. Coskery"

subset$docauthorid[subset$docauthorid=="IED0569"] <- "IED0572"
subset$docauthorname[subset$docauthorid=="IED0572"] <- "Joseph Carswell"

subset$docauthorid[subset$docauthorid=="IED0654"] <- "IED0657"
subset$docauthorname[subset$docauthorid=="IED0657"] <- "May Ann Blair"

subset$docauthorid[subset$docauthorid=="IED0183"] <- "IED0701"
subset$docauthorname[subset$docauthorid=="IED0701"] <- "Edith Gass"

subset$authorGender[subset$docauthorid=="IED0345"] <- "M"
subset$docauthorid[subset$docauthorid=="IED0345"] <- "IED0403"
subset$docauthorname[subset$docauthorid=="IED0403"] <- "James Gamble"

subset$docauthorid[subset$docauthorid=="IED0274"] <- "IED0267"
subset$docauthorname[subset$docauthorid=="IED0267"] <- "Hannah B. Longstreet" 
subset$authorGender[subset$docauthorid=="IED0267"] <- "F"

subset$docauthorid[subset$docauthorid=="IED0317"] <- "IED0107"
subset$docauthorname[subset$docauthorid=="IED0107"] <- "Isabella Weir Moore" # Married name per 20910

subset$docauthorid[subset$docauthorid=="IED1043"] <- "IED0958"
subset$docauthorid[subset$docauthorid=="IED1044"] <- "IED0958" # 49253 IED website Willie J. Weir
subset$docauthorname[subset$docauthorid=="IED0958"] <- "William J. Weir" # 42325 "thrashing" ref
subset$authorGender[subset$docauthorid=="IED0958"] <- "M"

subset$docauthorid[subset$docauthorid=="IED1007"] <- "IED0957"
subset$docauthorname[subset$docauthorid=="IED0957"] <- "William J. Stavely" # Dear mother in Belfast
subset$M[subset$docauthorid=="IED0957"] <- TRUE # Speaks of business as occupation
subset$authorGender[subset$docauthorid=="IED0957"] <- "M"

subset$docauthorid[subset$docauthorid=="IED0837"] <- "IED0838"
subset$docauthorid[subset$docauthorid=="IED0836"] <- "IED0838"
subset$docauthorname[subset$docauthorid=="IED0838"] <- "Ruth J. Ramsey" # Married name per 20910

subset$docauthorid[subset$docauthorid=="IED0698"] <- "IED0710"
subset$docauthorname[subset$docauthorid=="IED0710"] <- "Martha Blair Cranston" # Incorrect name in IED

vals <- c("IED0621", 
          "IED0611", 
          "IED0692",
          "IED0932", 
          "IED0572",
          "IED0657",
          "IED0701",
          "IED0403",
          "IED0267",
          "IED0107", 
          "IED0958", #Weir
          "IED1044", #Weir
          "IED0957",
          "IED0838", #Ramsey
          "IED0836", #Ramsey
          "IED0710")

subset %>% 
filter(docauthorid %in% vals) %>% 
select(docauthorname, docauthorid, authorGender) %>% 
unique()

Unnamed: 0_level_0,docauthorname,docauthorid,authorGender
Unnamed: 0_level_1,<chr>,<chr>,<chr>
2,Isabella Weir Moore,IED0107,F
3,Mary Savage,IED0621,F
7,May Ann Blair,IED0657,F
29,Martha Blair Cranston,IED0710,F
44,Ruth J. Ramsey,IED0838,F
60,Hannah B. Longstreet,IED0267,F
69,Edith Gass,IED0701,F
126,Mitilda Ferguson,IED0692,F
154,William J. Stavely,IED0957,M
162,Joseph Carswell,IED0572,M


In [656]:
# Summary statistics

# Number of observations
nrow(subset)

# Distribution of men and women by country.
temp <- subset %>% 
select(docauthorid, authorLocation, authorGender)  %>% 
unique()
table(temp$authorLocation, temp$authorGender)
round(prop.table(table(temp$authorLocation, temp$authorGender)), digits = 2)

#Number of NAs
sum(is.na(temp$authorGender))

        
          F  M
  Canada 13  2
  USA    52 15

        
            F    M
  Canada 0.16 0.02
  USA    0.63 0.18

In [657]:
#vals <- c("IED0727",
          #"IED0374", #IED0369 
          #"IED0567", #IED0369
          #"IED0566" #IED0369
         #)

#all <- df$docid[df$docauthorid %in% vals]
#omit <- subset$docid[subset$docauthorid %in% vals]
#toAdd <- setdiff(all, omit)

#write.csv(toAdd, "20240603_PhD_IEDSubset01.csv", row.names=FALSE)

In [658]:
toAdd <- read.csv("20240603_PhD_IEDSubset01.csv")
toAdd <- toAdd$x
toAdd <- df[df$docid %in% toAdd,]
nrow(toAdd)

In [659]:
# Add rows to subset and correct names and genders

subset <- rbind(subset, toAdd)

subset$docauthorid[subset$docauthorid=="IED0089.70"] <- "IED0727"
subset$docauthorname[subset$docauthorid=="IED0727"] <- "Nato B. Smyth" 
subset$authorGender[subset$docauthorid=="IED0727"] <- "F"

subset$docauthorname[subset$docauthorid=="IED0233"] <- "George McCann" 
subset$authorGender[subset$docauthorid=="IED0233"] <- "M" #https://sites.rootsweb.com/~rosdavies/SURNAMES/Mc/McCann.htm
subset$relMin[subset$docauthorid=="IED0233"] <- FALSE

subset$authorGender[subset$docauthorid=="IED0254"] <- "M"

subset$docauthorname[subset$docauthorid=="IED0089.89"] <- "Henry Jim Keenan" 
subset$authorGender[subset$docauthorid=="IED0089.89"] <- "M"

subset$docauthorid[subset$docauthorid=="IED0369"] <- "IED0374"
subset$docauthorid[subset$docauthorid=="IED0567"] <- "IED0374"
subset$docauthorid[subset$docauthorid=="IED0566"] <- "IED0374"
subset$docauthorname[subset$docauthorid=="IED0374"] <- "Jonathan W. Smith" 
subset$authorGender[subset$docauthorid=="IED0374"] <- "M"

vals <- c("46943", # Date appears to be missing and probably incorrectly surmised)
          "50508", # Letter to a stranger
          "33534", # Letter to government
          "46812", # Business letter
          "48266" # Brief, business letter
          )

subset <- subset[!subset$docid %in% vals,]

In [660]:
vals <- c("40692", "29132")

addRows <- df[df$docid %in% vals,]
addRows$docauthorid <- "IED1059"
subset <- rbind(subset, addRows)
subset$docauthorname[subset$docauthorid=="IED1059"] <- "William Porter" 
subset$authorGender[subset$docauthorid=="IED1059"] <- "M" 

In [667]:
subset$docauthorname[subset$docid=="44386"] <- "Isabella Weir Moore"
subset$docauthorid[subset$docid=="44386"] <- "IED0107"

In [668]:
#Summary statistics
nrow(subset)

# Distribution of men and women by country.
temp <- subset %>% 
select(docauthorid, authorLocation, authorGender)  %>% 
unique()
table(temp$authorLocation, temp$authorGender)
round(prop.table(table(temp$authorLocation, temp$authorGender)), digits = 2)

#Number of NAs
sum(is.na(temp$authorGender))

        
          F  M
  Canada 13  3
  USA    52 18

        
            F    M
  Canada 0.15 0.03
  USA    0.60 0.21

In [682]:
print("Number of women across NAILDOH and IED")
14+13+52

print("Number of men across NAILDOH and IED")
78+3+18

[1] "Number of women across NAILDOH and IED"


[1] "Number of men across NAILDOH and IED"


In [680]:
#subset %>% 
#select(docauthorid, docauthorname, authorGender) %>% 
#unique() %>% 
#print()

In [661]:
#vals <- c("IED0727",
          #"IED0374", #IED0369 
          #"IED0567", #IED0369
          #"IED0566", #IED0369
         #)

#subset %>% 
#filter(docauthorid %in% vals) %>% 
#select(docauthorname, docauthorid, authorGender) %>% 
#unique()

In [681]:
#df[df$docauthorid=="IED0089.79",]
#df[grepl("William Porter", df$docauthorname),]
#df[df$docid=="33534",]
#df %>% filter(docauthorname=="George")

#subset[subset$docauthorid=="IED0089.79",]
#subset[grepl("IED0089.73", subset$docauthorid),]
#subset[subset$docid=="22996",]
#subset[grepl("Weir", subset$docauthorname),]

#toAdd[toAdd$docauthorid=="IED0243",]
#toAdd[grepl("IED0037", toAdd$docauthorid),]
#toAdd[toAdd$docid=="33534",]
#toAdd[grepl("Gamble", toAdd$docauthorname),]

#vals <- c("23590", "50454", "31379", "27261", "26789", "26511", "21334")
#subset[subset$docid %in% vals,]

Business letters that offer no personal information are omitted while letters that refer to interpersonal debt, loans, deals and transactions AND that include personal information (e.g., living conditions, family circumstances, etc). are kept. 

In [675]:
summary(factorize(subset))

     docid       nationalOrigin authorGender   relMin           U          
 Min.   :20910   Irish:253      F:176        Mode :logical   Mode:logical  
 1st Qu.:27978                  M: 77        FALSE:10        TRUE:3        
 Median :35996                               NA's :243       NA's:250      
 Mean   :36638                                                             
 3rd Qu.:45423                                                             
 Max.   :53397                                                             
                                                                           
    M              S              F               L              docmonth     
 Mode:logical   Mode:logical   Mode:logical   Mode :logical   Min.   : 1.000  
 TRUE:12        NA's:253       NA's:253       FALSE:9         1st Qu.: 3.000  
 NA's:241                                     NA's :244       Median : 6.000  
                                                              Mean   : 6.381

stopwords: #page, [sic], [stained], [torn], [..............Torn], [faded], [Page 



In [683]:
# make list of doc ids in folder
WD <- getwd()
setwd(WD)
files <- list.files("SubsetIED/Subset")
vals <- sort(sub('.txt', '', files))

# Checking to make sure csv matches folder list.

# make list of doc ids in csv
csv <- sort(subset$docid)

print("files in CSV but not in folder")
setdiff(csv, vals)

print("files in folder but not in CSV")
setdiff(vals, csv)

[1] "files in CSV but not in folder"


[1] "files in folder but not in CSV"


In [684]:
write.csv(subset, 
          "20240603_PhD_IEDSubset.csv", 
          row.names=FALSE)