In [1]:
# load packages 
library(pacman)
p_load(
    tidyverse, data.table, dtplyr, reshape2, 
    archive, kableExtra, SPARQL, janitor, 
    httr, jsonlite)
# set option
options(dplyr.summarise.inform = FALSE)
gray_scale <- c('#F3F4F8','#D2D4DA', '#B3B5BD', 
                '#9496A1', '#7d7f89', '#777986', 
                '#656673', '#5B5D6B', '#4d505e',
                '#404352', '#2b2d3b', '#282A3A',
                '#1b1c2a', '#191a2b',
                '#141626', '#101223')

In [2]:
han_names <- fread('work/notebooks/patent/data/202208_HAN_NAMES.txt')

In [7]:
dim(han_names)

In [7]:
head(han_names)

HAN_ID,Clean_name,Person_ctry_code
<int>,<chr>,<chr>
1,& HAMBOURG NIENDORF,DE
2,& KK,JP
3,“ASTRONIT” CLOSE CORP,RU
4,“DEUTSCHE SEE” GMBH,DE
5,“EFIRNOIE” OPEN JOINT STOCK CO,RU
6,“EUROSTANDART” LTD LIABILITY CO,RU


In [3]:
de_firms <- fread('work/notebooks/patent/data/orbis_de_matched_l.csv')

In [55]:
han_names %>%
    .[1:5,] %>%
    kable('pipe', align='ccc')



| HAN_ID |           Clean_name           | Person_ctry_code |
|:------:|:------------------------------:|:----------------:|
|   1    |      & HAMBOURG NIENDORF       |        DE        |
|   2    |              & KK              |        JP        |
|   3    |     “ASTRONIT” CLOSE CORP      |        RU        |
|   4    |      “DEUTSCHE SEE” GMBH       |        DE        |
|   5    | “EFIRNOIE” OPEN JOINT STOCK CO |        RU        |

In [53]:
de_firms %>%
    .[1:5, 2:3]

name_native,name_internat
<chr>,<chr>
Airbus Defence and Space GmbH,Airbus Defence and Space GmbH
EurA AG,EurA AG
TuTech Innovation GmbH,TuTech Innovation GmbH
FFT Produktionssysteme GmbH & Co. KG.,FFT Produktionssysteme GmbH & Co. KG.
Diehl Aviation Laupheim GmbH,Diehl Aviation Laupheim GmbH


In [83]:
# query
airbus <- toupper('Airbus Defence')
han_names %>%
    .[Person_ctry_code == 'DE'] %>%
    .[Clean_name %like% airbus] %>%
    .[,HAN_ID] -> airbus_han_ids
airbus_han_ids

In [4]:
han_patents <- fread('work/notebooks/patent/data/202208_HAN_PATENTS.txt')

In [86]:
han_patents %>%
    .[HAN_ID %in% airbus_han_ids] %>%
    .[, .N, by=Publn_auth] -> foo
    transform(adorn_totals(foo)) %>%
    transpose() %>%
    row_to_names(row_number=1)

Unnamed: 0_level_0,EP,US,WO,Total
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
2,716,415,88,1219


In [84]:
han_patents %>%
    .[HAN_ID %in% airbus_han_ids] %>%
    .[sample(.N, 5)]

HAN_ID,HARM_ID,Appln_id,Publn_auth,Patent_number
<int>,<int>,<int>,<chr>,<chr>
60513,60513,442096647,EP,EP3112597
60513,60513,417401751,EP,EP2913271
60513,60513,404985890,EP,EP2825450
60513,60513,527283926,US,US2020070202
60513,60513,544408405,WO,WO2021123759


In [96]:
han_patents %>%
    .[HAN_ID %in% airbus_han_ids] %>%
    .[Publn_auth == 'EP'] -> airbus_ep_patents
    head(airbus_ep_patents)

HAN_ID,HARM_ID,Appln_id,Publn_auth,Patent_number
<int>,<int>,<int>,<chr>,<chr>
60513,60513,213,EP,EP2030891
60513,60513,65448,EP,EP2025928
60513,60513,156990,EP,EP1920908
60513,60513,161551,EP,EP1972896
60513,60513,173385,EP,EP2134522
60513,60513,173386,EP,EP2136979


In [27]:
request <- GET('https://data.epo.org/linked-data/data/publication/EP/1972896.json')

In [28]:
response <- content(request, as = "text", encoding = "UTF-8")
json <- fromJSON(response, flatten = TRUE)
names(json$result)

In [12]:
head(json$result$items)

Unnamed: 0_level_0,_about,label,publicationAuthority,publicationDate,application._about,application.applicationNumber,publicationKind._about,publicationKind.label
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,http://data.epo.org/linked-data/data/publication/EP/1972896/A2/-,EP 1972896 A2,http://data.epo.org/linked-data/id/st3/EP,"Wed, 24 Sep 2008",http://data.epo.org/linked-data/id/application/EP/08004318,8004318,http://data.epo.org/linked-data/def/patent/publicationKind_A2,A2
2,http://data.epo.org/linked-data/data/publication/EP/1972896/A3/-,EP 1972896 A3,http://data.epo.org/linked-data/id/st3/EP,"Wed, 07 Nov 2012",http://data.epo.org/linked-data/id/application/EP/08004318,8004318,http://data.epo.org/linked-data/def/patent/publicationKind_A3,A3
3,http://data.epo.org/linked-data/data/publication/EP/1972896/B1/-,EP 1972896 B1,http://data.epo.org/linked-data/id/st3/EP,"Wed, 06 May 2015",http://data.epo.org/linked-data/id/application/EP/08004318,8004318,http://data.epo.org/linked-data/def/patent/publicationKind_B1,B1


In [15]:
json$result$items %>%
    select(
        `_about`, 
        publicationDate, 
        application.applicationNumber,
        publicationKind.label) %>%
    rename(Link=`_about`, Publication_date=publicationDate, 
        Application_number=application.applicationNumber, 
        Kind_code=publicationKind.label) %>%
        kable('pipe', align='lccc')



|Link                                                             | Publication_date | Application_number | Kind_code |
|:----------------------------------------------------------------|:----------------:|:------------------:|:---------:|
|http://data.epo.org/linked-data/data/publication/EP/1972896/A2/- | Wed, 24 Sep 2008 |      08004318      |    A2     |
|http://data.epo.org/linked-data/data/publication/EP/1972896/A3/- | Wed, 07 Nov 2012 |      08004318      |    A3     |
|http://data.epo.org/linked-data/data/publication/EP/1972896/B1/- | Wed, 06 May 2015 |      08004318      |    B1     |

In [99]:
get_publications <- function(url) {
    # initialize df
    df0 <- data.frame(
            link=character(),
            pub_date=character(),
            appln_number=character(),
            kind_code=character(),
            stringsAsFactors = FALSE
        )
    # set up headers 
    request <- GET(
        url,
        add_headers(
            Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            `Accept-Encoding` = "gzip, deflate, br",
            `Accept-Language` = "en-US,en;q=0.9",
            Connection = "keep-alive",
            Cookie = "__ssds=2; __ssuzjsr2=a9be0cd8e; __uzmbj2=1674140969; __uzmaj2=bc092231-9990-40c4-9dc9-a2522cc3d5f3; __uzmdj2=1674381382; __uzmcj2=541805878521",
            Host = "data.epo.org",
            `Sec-Fetch-Dest` =  "document",
            `Sec-Fetch-Mode` = "navigate",
            `Sec-Fetch-Site` = "cross-site",
            `Sec-Fetch-User` = "?1",
            `Upgrade-Insecure-Requests` = "1",
            `User-Agent` = "Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.188 Safari/537.36 CrKey/1.54.250320"
        )
    )

    if (status_code(request) == 200) {
        response <- content(request, as = "text", encoding = "UTF-8")
        json <- fromJSON(response, flatten = TRUE)

        if (length(json$result$items) == 0){
            return(df0)
        } else {
            json$result$items %>%
            select(
                `_about`, 
                publicationDate, 
                application.applicationNumber,
                publicationKind.label) %>%
            rename(link=`_about`, pub_date=publicationDate, 
                appln_number=application.applicationNumber, 
                kind_code=publicationKind.label) -> df

                return(df)
        }
    } else {
        return(df0)
    }

    return(df0)
}

In [47]:
get_granted <- function(url, kind='B1') {
    df <- get_publications(url)
    df %>%
        filter(kind_code == kind) -> foo
    
    return(foo)
}

In [66]:
construct_url <- function(patent_number) {
    pub_link = 'https://data.epo.org/linked-data/data/publication/EP/'
    patent_number <- gsub('EP', '', patent_number)
    pn <- str_trim(patent_number)
    link <- paste(pub_link, pn, '.json', sep="")

    return(link)
}

In [94]:
url = construct_url(' EP2136979 ')

In [100]:
get_granted(url)

[1] 200


link,pub_date,appln_number,kind_code
<chr>,<chr>,<chr>,<chr>


In [95]:
get_granted(url)

link,pub_date,appln_number,kind_code
<chr>,<chr>,<chr>,<chr>


In [88]:
foo <- GET(url)
response <- content(foo, as = "text", encoding = "UTF-8")
json <- fromJSON(response, flatten = TRUE)
names(json$result)