# Tomotoshi Nishino: DMRG preprints

http://quattro.phys.sci.kobe-u.ac.jp/dmrg.html

Get all the arXiv ids and ratings from the extensive classification done by Tomotoshi Nishino.

In [1]:
library(stringr)

“package ‘stringr’ was built under R version 3.4.4”

In [2]:
# All years from 1998 to 2018
aa = as.character(c(0:9))
aa = paste('0',aa, sep="")
years = c(as.character(c(98:99)), aa, as.character(c(10:18)), "")

In [12]:
preprints = data.frame(arXiv_id=character(), NishinoRating=character()) 

for (yy in 1:length(years)) {
    # Get the corresponding URL from Nishino's webpage
    url = paste("http://quattro.phys.sci.kobe-u.ac.jp/dmrg/condmat", years[yy], ".html", 
        sep = "")
    # print(url)
    web_page = readLines(url)
    
    tot_preprints = length(grep("/abs/", web_page))
    
    clean_ids = rep("", tot_preprints)
    colors_ids = rep("#330099", tot_preprints)
    last_id = grep("/abs/", web_page)[1]
    
    count = 1
    for (i in 1:length(web_page)) {
        if (str_detect(web_page[i], "/abs/")) {
            clean_ids[count] = str_extract(web_page[i], "\\d+\\.\\d+|\\w+\\-\\w+/\\d+");
            count = count + 1
            last_id = i
        }
        if (str_detect(web_page[i], "FONT.COLOR") & i == last_id + 1) {
            colors_ids[count - 1] = tolower(str_extract(web_page[i], "#?\\w{2}\\d{4}"));
            last_id = 0
        }
        
    }
    
    for (i in 1:length(colors_ids)) {
        colors_ids[i] = str_sub(colors_ids[i], -6, -1)
    }
    
    # Convert from color coded ids to rating
    # 0: Regular preprint
    colors_ids[colors_ids == "330099"] = "0"
    # 1: "major technical or conceptual advances, cool applications and good reviews"
    colors_ids[colors_ids == "ff0000"] = "1"
    # -1: "related field"
    colors_ids[colors_ids == "006600"] = "-1"
    
    
    
    new_rows = cbind(clean_ids, colors_ids)
    
    preprints = rbind(preprints, new_rows)
}

In [13]:
names(preprints) = c("arXiv_id", "Nishino_Rating")
preprints$Nishino_Rating = as.factor(preprints$Nishino_Rating)
dim(preprints)
summary(preprints)

             arXiv_id    Nishino_Rating
 0707.1454       :   4   -1:1821       
 quant-ph/0610099:   3   0 :3945       
 cond-mat/0011348:   2   1 : 258       
 cond-mat/0104533:   2                 
 cond-mat/0301067:   2                 
 cond-mat/0303557:   2                 
 (Other)         :6009                 

In [14]:
# Remove duplicated arXiv ids
preprints = preprints[!duplicated(preprints$arXiv_id),]

In [15]:
summary(preprints$Nishino_Rating)

In [16]:
write.csv(preprints, file="NishinoPreprints.csv", row.names = FALSE)