In [1]:
library(tidyverse)
library(jsonlite)
library(tidystringdist)
library(kableExtra)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten



Attaching package: ‘kableExtra’


The following object is masked fr

In [2]:
BASE_DIR <- '/Users/jon/Documents/NEU/Projects/llm-mutation-testing/mutation-testing-data'

# find all mutants.json files in BASE_DIR
mutants_files <- list.files(path = BASE_DIR, recursive = TRUE, pattern = "mutants.json", full.names = TRUE)

# load in all mutants.json files, adding a column for the file path
mutants <- map_df(mutants_files, ~fromJSON(.x) %>%
                    mutate(file = .x)) %>%
        extract(file, c('model','template','temperature','run','project'), "mutation-testing-data/([^/]+)/template-([^-]+)-([^/]+)/run([^/]+)/projects/([^/]+)/") 

mutants <- tidy_stringdist(mutants, "originalCode", "replacement", method = "lv")

In [22]:
# calculate string edit distance between originalCode and replacement
# mutants %>%
# mutate(b=replacement,a=originalCode)%>%
# head() %>%
  # mutate(edit_distance = tidy_stringdist(a, b, method = "jaccard"))
# mutants%>% head()
mutantEditDistance <- mutants %>% group_by(project,model,template,temperature) %>% summarise(mean_edit_distance = mean(lv)) %>% ungroup() %>% 
mutate(mean_edit_distance = round(mean_edit_distance,2)) 

mutantEditDistance %>%
filter(template=="full" & temperature=="0.0")%>%
pivot_wider(names_from=c("model","template","temperature"), values_from=mean_edit_distance) %>%
select(project, `codellama-34b-instruct_full_0.0`, `codellama-13b-instruct_full_0.0`, `mixtral-8x7b-instruct_full_0.0`) %>%
kable(format.args = list(big.mark = ","), escape = F,
 format = "latex",booktabs = TRUE, linesep = "")


 mutantEditDistance %>%
filter(model=='codellama-34b-instruct')%>%
pivot_wider(names_from=c("model","template","temperature"), values_from=mean_edit_distance) %>%
select(project, `codellama-34b-instruct_full_0.0`,`codellama-34b-instruct_onemutation_0.0`,
`codellama-34b-instruct_noexplanation_0.0`,
`codellama-34b-instruct_noinstructions_0.0`,
`codellama-34b-instruct_full_genericsystemprompt-0.0`,
`codellama-34b-instruct_basic_0.0`
 ) %>%
kable(format.args = list(big.mark = ","), escape = F,
 format = "latex",booktabs = TRUE, linesep = "")

[1m[22m`summarise()` has grouped output by 'project', 'model', 'template'. You can
override using the `.groups` argument.



\begin{tabular}{lrrr}
\toprule
project & codellama-34b-instruct_full_0.0 & codellama-13b-instruct_full_0.0 & mixtral-8x7b-instruct_full_0.0\\
\midrule
Complex.js & 4.27 & 4.63 & 8.95\\
countries-and-timezones & 11.13 & 9.78 & 13.65\\
crawler-url-parser & 9.50 & 8.82 & 12.65\\
delta & 9.55 & 8.88 & 14.15\\
image-downloader & 12.67 & 10.74 & 14.21\\
node-dirty & 7.53 & 7.42 & 12.08\\
node-geo-point & 8.86 & 8.15 & 15.27\\
node-jsonfile & 9.73 & 10.07 & 10.81\\
plural & 8.14 & 6.29 & 10.37\\
pull-stream & 6.72 & 8.71 & 9.67\\
q & 8.61 & 9.71 & 13.23\\
spacl-core & 9.30 & 10.84 & 12.61\\
zip-a-folder & 9.85 & 12.38 & 11.61\\
\bottomrule
\end{tabular}


\begin{tabular}{lrrrrrr}
\toprule
project & codellama-34b-instruct_full_0.0 & codellama-34b-instruct_onemutation_0.0 & codellama-34b-instruct_noexplanation_0.0 & codellama-34b-instruct_noinstructions_0.0 & codellama-34b-instruct_full_genericsystemprompt-0.0 & codellama-34b-instruct_basic_0.0\\
\midrule
Complex.js & 4.27 & 3.37 & 5.09 & 4.27 & 4.17 & 11.98\\
countries-and-timezones & 11.13 & 7.75 & 11.17 & 10.87 & 10.85 & 11.29\\
crawler-url-parser & 9.50 & 6.41 & 9.46 & 9.49 & 9.30 & 20.04\\
delta & 9.55 & 7.38 & 9.91 & 9.43 & 9.14 & 19.63\\
image-downloader & 12.67 & 8.82 & 12.89 & 11.01 & 11.48 & 21.92\\
node-dirty & 7.53 & 6.90 & 7.58 & 7.41 & 7.51 & 17.52\\
node-geo-point & 8.86 & 6.10 & 8.79 & 7.75 & 8.66 & 15.66\\
node-jsonfile & 9.73 & 6.98 & 9.76 & 7.77 & 8.91 & 11.64\\
plural & 8.14 & 5.21 & 8.41 & 7.58 & 7.80 & 23.64\\
pull-stream & 6.72 & 4.57 & 7.53 & 7.48 & 7.30 & 11.92\\
q & 8.61 & 7.61 & 9.21 & 8.60 & 8.58 & 16.18\\
spacl-core & 9.30 & 5.86 & 10.44 & 9.43 & 9.44 & 14.27

In [4]:
# mutants %>% filter(project=='image-downloader' & (template=='full' | template=='basic') & temperature=='0.0') 
# pivot_wider()

biggestDifferences <- mutants %>% group_by(project, template, temperature, model) %>%
slice_max(lv,n = 2, with_ties= FALSE) %>% arrange(project, model, template, temperature)
leastDifferences <- mutants %>% group_by(project, template, temperature, model) %>%
slice_min(lv,n = 2, with_ties= FALSE) %>% arrange(project, model, template, temperature)

write.csv(biggestDifferences, file = "biggestDifferences.csv", row.names = FALSE)
write.csv(leastDifferences, file = "leastDifferences.csv", row.names = FALSE)


In [5]:
mutants_baseline <- mutants %>% filter(template=='full' & temperature=='0.0' & model=='codellama-34b-instruct')

In [6]:
mutants %>% filter(!(template=='full' & temperature=='0.0' & model=='codellama-34b-instruct')) %>%
inner_join(mutants_baseline, by=c('project','startLine','endLine','startColumn','endColumn','originalCode'), suffix = c("",".baseline"), relationship = "many-to-many")  %>% 
mutate(lvDiff=abs(lv - lv.baseline)) %>%
group_by(project,model,template,temperature) %>% summarise(mean_lvDiff = mean(lvDiff)) %>% ungroup() %>%
pivot_wider(names_from = c("model","template","temperature"), values_from = mean_lvDiff) 
# select(project,template,temperature,run,model,originalCode,replacement.baseline,replacement,lvDiff)%>%
# head()

[1m[22m`summarise()` has grouped output by 'project', 'model', 'template'. You can
override using the `.groups` argument.


project,codellama-13b-instruct_full_0.0,codellama-34b-instruct_basic_0.0,codellama-34b-instruct_full_0.25,codellama-34b-instruct_full_0.5,codellama-34b-instruct_full_1.0,codellama-34b-instruct_full_genericsystemprompt-0.0,codellama-34b-instruct_noexplanation_0.0,codellama-34b-instruct_noinstructions_0.0,codellama-34b-instruct_onemutation_0.0,mixtral-8x7b-instruct_full_0.0
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Complex.js,3.029531,8.892899,1.978023,2.448525,4.379572,1.63605,2.346533,1.968825,1.858666,6.205143
countries-and-timezones,7.731699,6.927419,5.964977,6.580906,8.674877,5.661265,6.633782,6.75104,6.127778,9.216538
crawler-url-parser,6.407374,17.646591,4.714363,6.072846,8.710418,4.246158,4.992904,4.371172,3.949206,10.25297
delta,6.173866,12.739788,4.024385,5.215778,7.88353,3.55453,4.605344,4.742474,3.989054,9.26824
image-downloader,6.559651,17.385714,5.674227,6.432223,8.670696,5.374587,5.788618,5.784014,4.718535,9.222222
node-dirty,4.308391,9.668621,3.963428,4.584016,6.511032,3.290323,3.682717,4.050228,3.326154,7.980945
node-geo-point,6.461708,14.053691,3.12713,4.589208,7.294772,2.596789,3.227074,3.991463,3.73218,11.665262
node-jsonfile,5.977654,8.066667,4.964872,5.884913,8.132706,5.026067,4.859899,5.493874,4.400524,7.52436
plural,7.116323,20.358684,5.156992,6.015751,8.590891,4.239615,5.627558,5.120312,4.388862,9.392463
pull-stream,5.208274,7.742737,3.474531,3.897119,5.137072,3.357048,3.757894,4.301716,3.365128,6.06868
