# Using an LLM to Help With Code Understanding

In [1]:
# Skip this if MuMIn, car, rsq are already installed
nloptr_url <-"https://cran.r-project.org/src/contrib/Archive/nloptr/nloptr_1.2.1.tar.gz"
install.packages(nloptr_url, repos=NULL, type="source")
mumin_url <- "https://cran.r-project.org/src/contrib/Archive/MuMIn/MuMIn_1.46.0.tar.gz"
install.packages(mumin_url, repos=NULL, type="source")

install.packages("rsq")


also installing the dependencies ‘boot’, ‘minqa’, ‘RcppEigen’, ‘lme4’, ‘Deriv’, ‘deming’


Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [2]:

pbkrtest_url <- "https://cran.r-project.org/src/contrib/Archive/pbkrtest/pbkrtest_0.4-5.tar.gz" 
install.packages(pbkrtest_url, repos=NULL, type="source")

# car_url <- "https://cran.r-project.org/src/contrib/Archive/car/car_3.1-1.tar.gz" 
# install.packages(car_url, repos=NULL, type="source")
install.packages("car")


also installing the dependencies ‘Matrix’, ‘SparseM’, ‘MatrixModels’, ‘carData’, ‘abind’, ‘quantreg’


Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [3]:
library(MuMIn)
library(car)
library(rsq)

Loading required package: carData



In [4]:
df <- read.csv(file="study_data.csv", header=TRUE)

## Time

In [5]:
Q <- quantile(df$success_time_no_guess, probs=c(.01, .99), na.rm = FALSE)
eliminated<- subset(df, df$success_time_no_guess >= Q[1] & df$success_time_no_guess <= Q[2])

In [6]:
no_guess_time_model = lm(success_time_no_guess ~ 
                  tool +
                  experience +
                  recruiting.years +
                  AI_experience,
                data = eliminated)
summary(no_guess_time_model)


Call:
lm(formula = success_time_no_guess ~ tool + experience + recruiting.years + 
    AI_experience, data = eliminated)

Residuals:
    Min      1Q  Median      3Q     Max 
-365.00 -169.86  -20.92   96.79  612.00 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)  
(Intercept)       312.649    185.333   1.687    0.097 .
tool               -9.098     57.257  -0.159    0.874  
experience         23.138     25.401   0.911    0.366  
recruiting.years  -23.671     43.534  -0.544    0.589  
AI_experience       7.703     27.038   0.285    0.777  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 227 on 58 degrees of freedom
Multiple R-squared:  0.02192,	Adjusted R-squared:  -0.04553 
F-statistic: 0.325 on 4 and 58 DF,  p-value: 0.8601


In [7]:
vif(no_guess_time_model)

## Understanding

In [8]:
Q <- quantile(df$understanding, probs=c(.01, .99), na.rm = FALSE)
eliminated<- subset(df, df$understanding >= Q[1] & df$understanding <= Q[2])

In [9]:
understanding_model =glm(understanding ~ 
                  tool +
                  experience +
                  recruiting.years +
                  AI_experience,
                data = eliminated,
                family=quasipoisson)
summary(understanding_model)


Call:
glm(formula = understanding ~ tool + experience + recruiting.years + 
    AI_experience, family = quasipoisson, data = eliminated)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.9804  -1.0091  -0.4981   0.5261   2.0323  

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -1.80950    0.88632  -2.042 0.045674 *  
tool              0.29092    0.28117   1.035 0.305041    
experience        0.41416    0.11598   3.571 0.000715 ***
recruiting.years  0.20093    0.21558   0.932 0.355107    
AI_experience    -0.08944    0.13894  -0.644 0.522236    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 1.019686)

    Null deviance: 81.806  on 63  degrees of freedom
Residual deviance: 66.229  on 59  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [10]:
vif(understanding_model)
rsq(understanding_model)
rsq(understanding_model, adj=TRUE)

## Progress

In [11]:
Q <- quantile(df$progress_no_guess, probs=c(.01, .99), na.rm = FALSE)
eliminated<- subset(df, df$progress_no_guess >= Q[1] & df$progress_no_guess <= Q[2])

In [12]:
progress_model= glm(progress_no_guess ~ 
                  tool +
                  experience +
                  recruiting.years +
                  AI_experience,
                data = eliminated,
                family=quasipoisson)
summary(progress_model)


Call:
glm(formula = progress_no_guess ~ tool + experience + recruiting.years + 
    AI_experience, family = quasipoisson, data = eliminated)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.99544  -0.58170  -0.09451   0.47915   1.57589  

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)   
(Intercept)       0.411276   0.489850   0.840  0.40452   
tool              0.474858   0.155829   3.047  0.00345 **
experience        0.128921   0.065531   1.967  0.05385 . 
recruiting.years -0.102899   0.116218  -0.885  0.37954   
AI_experience    -0.008889   0.073082  -0.122  0.90360   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 0.65364)

    Null deviance: 56.650  on 63  degrees of freedom
Residual deviance: 47.874  on 59  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [13]:
vif(progress_model)
rsq(progress_model)
rsq(progress_model, adj=TRUE)

## Progress - Professionals

In [14]:
progress_model= glm(progress_no_guess ~ 
                  tool +
                  experience +
                  recruiting.years +
                  AI_experience,
                data = eliminated[eliminated$is_professional==1,],
                family=quasipoisson)
summary(progress_model)


Call:
glm(formula = progress_no_guess ~ tool + experience + recruiting.years + 
    AI_experience, family = quasipoisson, data = eliminated[eliminated$is_professional == 
    1, ])

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.51998  -0.53314  -0.00373   0.34619   1.47372  

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)  
(Intercept)      -0.380009   0.675660  -0.562   0.5785  
tool              0.574312   0.216699   2.650   0.0133 *
experience        0.157089   0.092289   1.702   0.1002  
recruiting.years  0.005766   0.167227   0.034   0.9728  
AI_experience     0.068821   0.105228   0.654   0.5186  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 0.5829177)

    Null deviance: 26.628  on 31  degrees of freedom
Residual deviance: 18.063  on 27  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [15]:
vif(progress_model)
rsq(progress_model)
rsq(progress_model, adj=TRUE)

## Progress - Students

In [16]:
progress_model= glm(progress_no_guess ~ 
                  tool +
                  experience +
                  recruiting.years +
                  AI_experience,
                data = eliminated[eliminated$is_professional==0,],
                family=quasipoisson)
summary(progress_model)


Call:
glm(formula = progress_no_guess ~ tool + experience + recruiting.years + 
    AI_experience, family = quasipoisson, data = eliminated[eliminated$is_professional == 
    0, ])

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-2.16667  -0.44062   0.03898   0.44761   1.39012  

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)  
(Intercept)       1.82280    0.83114   2.193   0.0371 *
tool              0.28921    0.24924   1.160   0.2560  
experience        0.04163    0.11189   0.372   0.7128  
recruiting.years -0.36806    0.20574  -1.789   0.0848 .
AI_experience    -0.09812    0.10170  -0.965   0.3432  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 0.7262393)

    Null deviance: 29.881  on 31  degrees of freedom
Residual deviance: 26.263  on 27  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [17]:
vif(progress_model)
rsq(progress_model)
rsq(progress_model, adj=TRUE)

## Usage - Prompt

In [18]:
query_model = glm(query_total ~ 
                  AI_experience +
                  info_style + 
                  learning_style,
                  data = df[df$tool==1,],
                  family=quasipoisson)
summary(query_model)


Call:
glm(formula = query_total ~ AI_experience + info_style + learning_style, 
    family = quasipoisson, data = df[df$tool == 1, ])

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-2.85109  -0.69797  -0.04752   0.50455   1.74363  

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     1.38541    0.31008   4.468 0.000119 ***
AI_experience   0.18740    0.07011   2.673 0.012401 *  
info_style     -0.04181    0.14840  -0.282 0.780203    
learning_style  0.18625    0.14191   1.312 0.200027    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 1.207218)

    Null deviance: 47.961  on 31  degrees of freedom
Residual deviance: 36.182  on 28  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 4


In [19]:
rsq(query_model)
rsq(query_model, adj=TRUE)

## Usage - Followup

In [20]:
query_model = glm(Query_followup~ 
                  AI_experience +
                  info_style + 
                  learning_style,
                  data = df[df$tool==1,],
                  family=quasipoisson)
summary(query_model)


Call:
glm(formula = Query_followup ~ AI_experience + info_style + learning_style, 
    family = quasipoisson, data = df[df$tool == 1, ])

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-2.31075  -1.41911   0.04797   0.89780   1.79470  

Coefficients:
               Estimate Std. Error t value Pr(>|t|)  
(Intercept)     -0.8219     0.6935  -1.185   0.2459  
AI_experience    0.3835     0.1455   2.635   0.0136 *
info_style       0.4374     0.3036   1.441   0.1608  
learning_style   0.5996     0.2871   2.088   0.0460 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 1.584919)

    Null deviance: 68.690  on 31  degrees of freedom
Residual deviance: 53.359  on 28  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 5


In [21]:
rsq(query_model)
rsq(query_model, adj=TRUE)

## Usage - All

In [22]:
query_model = glm(usage_total ~ 
                  AI_experience +
                  info_style + 
                  learning_style,
                  data = df[df$tool==1,],
                  family=quasipoisson)
summary(query_model)


Call:
glm(formula = usage_total ~ AI_experience + info_style + learning_style, 
    family = quasipoisson, data = df[df$tool == 1, ])

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.0859  -1.0284   0.2056   0.8392   2.4115  

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     2.43261    0.27263   8.923 1.12e-09 ***
AI_experience   0.10570    0.06359   1.662    0.108    
info_style     -0.04060    0.13213  -0.307    0.761    
learning_style -0.12236    0.12938  -0.946    0.352    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for quasipoisson family taken to be 1.854085)

    Null deviance: 64.228  on 31  degrees of freedom
Residual deviance: 54.727  on 28  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 4


In [23]:
rsq(query_model)
rsq(query_model, adj=TRUE)