Skip to content

as.data.frame.textstat_proxy() drops document2 docnames when input is subset #1939

@kbenoit

Description

@kbenoit
library("quanteda")
## Package version: 2.0.2
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View

# how it should look
textstat_simil(data_dfm_lbgexample) %>%
  as.data.frame()
##    document1 document2 correlation
## 1         R1        R2   0.1767950
## 2         R1        R3  -0.2927247
## 3         R2        R3   0.1767950
## 4         R1        R4  -0.3199658
## 5         R2        R4  -0.2927247
## 6         R3        R4   0.1767950
## 7         R1        R5  -0.3201080
## 8         R2        R5  -0.3199658
## 9         R3        R5  -0.2927247
## 10        R4        R5   0.1767950
## 11        R1        V1  -0.1243030
## 12        R2        V1   0.8084589
## 13        R3        V1   0.6078216
## 14        R4        V1  -0.2103734
## 15        R5        V1  -0.3183551

textstat_simil(data_dfm_lbgexample[-1, ], data_dfm_lbgexample[1, ]) %>%
  as.data.frame()
##   document1 document2 correlation
## 1        R2      <NA>   0.1767950
## 2        R3      <NA>  -0.2927247
## 3        R4      <NA>  -0.3199658
## 4        R5      <NA>  -0.3201080
## 5        V1      <NA>  -0.1243030
textstat_simil(data_dfm_lbgexample[1, ], data_dfm_lbgexample[-1, ]) %>%
  as.data.frame()
##   document1 document2 correlation
## 1        R1      <NA>   0.1767950
## 2        R1      <NA>  -0.2927247
## 3        R1      <NA>  -0.3199658
## 4        R1      <NA>  -0.3201080
## 5        R1      <NA>  -0.1243030

textstat_simil(data_dfm_lbgexample[-c(1:2), ], data_dfm_lbgexample[1:2, ]) %>%
  as.data.frame()
##   document1 document2 correlation
## 1        R3      <NA>  -0.2927247
## 2        R4      <NA>  -0.3199658
## 3        R5      <NA>  -0.3201080
## 4        V1      <NA>  -0.1243030
## 5        R3      <NA>   0.1767950
## 6        R4      <NA>  -0.2927247
## 7        R5      <NA>  -0.3199658
## 8        V1      <NA>   0.8084589
textstat_simil(data_dfm_lbgexample[1:2, ], data_dfm_lbgexample[-c(1:2), ]) %>%
  as.data.frame()
##   document1 document2 correlation
## 1        R1      <NA>  -0.2927247
## 2        R2      <NA>   0.1767950
## 3        R1      <NA>  -0.3199658
## 4        R2      <NA>  -0.2927247
## 5        R1      <NA>  -0.3201080
## 6        R2      <NA>  -0.3199658
## 7        R1      <NA>  -0.1243030
## 8        R2      <NA>   0.8084589

Metadata

Metadata

Assignees

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions