In [1]:
# load packages 
library(pacman)
p_load(
    tidyverse, data.table, dtplyr, reshape2, 
    archive, kableExtra, SPARQL, janitor, 
    png, webp, Cairo, rsvg,
    httr, jsonlite)
# set option
options(dplyr.summarise.inform = FALSE)
gray_scale <- c('#F3F4F8','#D2D4DA',  '#bcc0ca', 
                '#D3D3D3', '#2B2B2B', 
                '#B3B5BD', '#838383', 
                '#9496A1', '#7d7f89', '#777986', 
                '#656673', '#5B5D6B', '#4d505e',
                '#404352', '#2b2d3b', '#282A3A',
                '#1b1c2a', '#191a2b',
                '#141626', '#101223')

In [2]:
# set up working directory
setwd('work/notebooks/patent')

In [62]:
# Code-Block 5. Extract patents for one-company full code
# read dataset 1, 2, and 3 
de_firms <- fread('./data/orbis_de_matched_l.csv')
han_names <- fread('./data/202208_HAN_NAMES.txt')
han_patents <- fread('./data/202208_HAN_PATENTS.txt')

# filter out germany firms from han_names 
# by setting Person_ctry_code == 'DE'
# match names "AIRBUS DEFENCE" and get their HAN_ID
airbus <- toupper('Airbus Defence')
han_names %>%
    .[Person_ctry_code == 'DE'] %>%
    .[Clean_name %like% airbus] %>%
    .[,HAN_ID] -> airbus_han_ids

# calcualte the summary statistics for AIRBUS DEFENCE
han_patents %>%
    .[HAN_ID %in% airbus_han_ids] %>%
    .[, .N, by=Publn_auth] -> foo
    transform(adorn_totals(foo)) %>%
    transpose() %>%
    row_to_names(row_number=1)

# focusing on patents from EPO
# filter with condition Publn_auth == 'EP'
han_patents %>%
    .[HAN_ID %in% airbus_han_ids] -> airbus_han_patents

# save the dataset
write.csv(airbus_han_patents, './data/airbus_han_patents.csv', row.names=FALSE)

Unnamed: 0_level_0,EP,US,WO,Total
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
2,716,415,88,1219


In [50]:
airbus_ep_application <- fread('./data/airbus_ep_applications.csv')

In [51]:
dim(airbus_ep_application)

In [5]:
head(airbus_ep_application)

patentNumber,applicationDate,granted,grantDate,familyID,cpcTags,publicationItems
<chr>,<chr>,<int>,<chr>,<int>,<chr>,<chr>
EP2030891,"Wed, 04 Jun 2008",0,,39745168,B64C 1/068,"['EP 2030891 A2', 'EP 2030891 A3']"
EP2025928,"Thu, 24 Jul 2008",0,,40039666,"['F03D 1/0675', 'F03D 3/062']","['EP 2025928 A2', 'EP 2025928 A3']"
EP1920908,"Fri, 02 Nov 2007",1,"Wed, 08 Apr 2015",39118022,"['B29C 70/386', 'B29C 70/541', 'B29C 70/543', 'B29C 70/545', 'B29C 70/56']","['EP 1920908 A1', 'EP 1920908 B1']"
EP1972896,"Sat, 08 Mar 2008",1,"Wed, 06 May 2015",39544979,G01C 23/00,"['EP 1972896 A2', 'EP 1972896 A3', 'EP 1972896 B1']"
EP2134522,"Thu, 13 Mar 2008",1,"Wed, 01 Mar 2017",39595708,"['B29B 11/16', 'B29C 70/382', 'B29C 70/386', 'B29C 70/545', 'B65H 49/18', 'B65H 51/005', 'B65H 57/16', 'D02J 1/18', 'D04H 1/4242', 'D04H 1/60', 'D04H 1/655', 'D04H 13/00', 'D04H 3/002', 'D04H 3/04', 'D04H 3/12']","['EP 2134522 A1', 'EP 2134522 B1', 'EP 2134522 B8']"
EP2136979,"Thu, 13 Mar 2008",1,"Wed, 11 May 2016",39495747,"['B29B 11/16', 'B29C 31/085', 'B29C 70/386', 'B29C 70/541', 'B29C 70/543', 'B29C 70/545']","['EP 2136979 A1', 'EP 2136979 B1']"


In [53]:
airbus_ep_publications <- fread('./data/airbus_ep_publications.csv')

In [54]:
dim(airbus_ep_publications)

In [55]:
head(airbus_ep_publications)

patentNumber,publicationDate,priorityNumber,language,ipc
<chr>,<chr>,<chr>,<chr>,<chr>
EP1920908,"Wed, 08 Apr 2015",DE/102006052592,de,"['B29C 70/38', 'B29C 70/54', 'B29C 70/56']"
EP1972896,"Wed, 06 May 2015",DE/102007014015,de,"['B64D 45/08', 'G01C 23/00']"
EP2134522,"Wed, 01 Mar 2017",DE/102007012608,de,"['B29B 11/16', 'B29C 70/20', 'B29C 70/38', 'B29C 70/54', 'B65H 49/18', 'B65H 51/005', 'B65H 57/16', 'D02J 1/18', 'D04H 1/4242', 'D04H 1/60', 'D04H 1/655', 'D04H 13/00', 'D04H 3/002', 'D04H 3/04', 'D04H 3/12']"
EP2136979,"Wed, 11 May 2016",DE/102007012609,de,"['B29B 11/16', 'B29C 70/38', 'B29C 70/54']"
EP1976108,"Wed, 21 Oct 2020",DE/102007015302,de,"['H02M 3/337', 'H02M 7/5387']"
EP2039604,"Wed, 01 Mar 2017",DE/102007045205,de,"['B64D 1/04', 'F41A 9/09', 'G01B 7/00', 'G01B 7/14', 'G01S 5/16']"


In [13]:
airbus_ep_application %>%
    .[, date := tstrsplit(applicationDate, split=',', keep=c(2))] %>%
    .[, year := tstrsplit(date, split=' ', keep=c(4))] %>%
    head()
    

patentNumber,applicationDate,granted,grantDate,familyID,cpcTags,publicationItems,date,year
<chr>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>
EP2030891,"Wed, 04 Jun 2008",0,,39745168,B64C 1/068,"['EP 2030891 A2', 'EP 2030891 A3']",04 Jun 2008,2008
EP2025928,"Thu, 24 Jul 2008",0,,40039666,"['F03D 1/0675', 'F03D 3/062']","['EP 2025928 A2', 'EP 2025928 A3']",24 Jul 2008,2008
EP1920908,"Fri, 02 Nov 2007",1,"Wed, 08 Apr 2015",39118022,"['B29C 70/386', 'B29C 70/541', 'B29C 70/543', 'B29C 70/545', 'B29C 70/56']","['EP 1920908 A1', 'EP 1920908 B1']",02 Nov 2007,2007
EP1972896,"Sat, 08 Mar 2008",1,"Wed, 06 May 2015",39544979,G01C 23/00,"['EP 1972896 A2', 'EP 1972896 A3', 'EP 1972896 B1']",08 Mar 2008,2008
EP2134522,"Thu, 13 Mar 2008",1,"Wed, 01 Mar 2017",39595708,"['B29B 11/16', 'B29C 70/382', 'B29C 70/386', 'B29C 70/545', 'B65H 49/18', 'B65H 51/005', 'B65H 57/16', 'D02J 1/18', 'D04H 1/4242', 'D04H 1/60', 'D04H 1/655', 'D04H 13/00', 'D04H 3/002', 'D04H 3/04', 'D04H 3/12']","['EP 2134522 A1', 'EP 2134522 B1', 'EP 2134522 B8']",13 Mar 2008,2008
EP2136979,"Thu, 13 Mar 2008",1,"Wed, 11 May 2016",39495747,"['B29B 11/16', 'B29C 31/085', 'B29C 70/386', 'B29C 70/541', 'B29C 70/543', 'B29C 70/545']","['EP 2136979 A1', 'EP 2136979 B1']",13 Mar 2008,2008


In [56]:
airbus_prior <- fread('./data/airbus_ep_pub_prior.csv')

In [57]:
dim(airbus_prior)

In [86]:
foo <- fread("./data/airbus_app_prior.csv")
dim(foo)

In [None]:
get_date <- function(date_string) {
    
}

In [99]:
substrRight <- function(x, n=4){
  substr(x, nchar(x)-n+1, nchar(x))
}

In [100]:
strfoo <- 'Fri, 02 Nov 2007'
substrRight(strfoo)

In [105]:
foo %>%
    .[, priorityYear := lapply(priorityDate, substrRight)] %>%
    .[, applicationYear := lapply(applicationDate, substrRight)] %>%
    .[, grantYear := lapply(grantDate, substrRight)] %>%
    tail()

patentNumber,applicationDate,granted,grantDate,familyID,cpcTags,publicationItems,priorityDate,applicationYear,priorityYear,grantYear
<chr>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<chr>,<list>,<list>,<list>
EP3883144,"Wed, 18 Mar 2020",1,"Wed, 07 Dec 2022",69845911,"['H04B 7/18502', 'H04W 56/0015']","['EP 3883144 A1', 'EP 3883144 B1']","Wed, 18 Mar 2020",2020,2020,2022
EP3882376,"Thu, 19 Mar 2020",1,"Wed, 05 Oct 2022",69846318,"['C25B 1/04', 'C25B 15/02', 'C25B 15/08', 'C25B 9/05']","['EP 3882376 A1', 'EP 3882376 B1']","Thu, 19 Mar 2020",2020,2020,2022
EP3886189,"Wed, 25 Mar 2020",1,"Wed, 06 Jul 2022",70289237,"['H02N 2/062', 'H02N 2/142', 'H10N 30/802']","['EP 3886189 A1', 'EP 3886189 B1']","Wed, 25 Mar 2020",2020,2020,2022
EP3731055,"Mon, 20 Apr 2020",1,"Wed, 08 Sep 2021",70333838,G05D 1/0676,"['EP 3731055 A1', 'EP 3731055 B1']","Tue, 23 Apr 2019",2020,2019,2021
EP3905535,"Tue, 28 Apr 2020",1,"Wed, 27 Apr 2022",70475966,"['G01S 19/21', 'H04B 1/7097']","['EP 3905535 A1', 'EP 3905535 B1']","Tue, 28 Apr 2020",2020,2020,2022
EP3913398,"Wed, 20 May 2020",1,"Wed, 16 Nov 2022",70802639,G01S 13/9011,"['EP 3913398 A1', 'EP 3913398 B1']","Wed, 20 May 2020",2020,2020,2022


## EPO Citations

In [15]:
epo_citations <- fread('./data/202208_EPO_CITATIONS.txt')

In [16]:
dim(epo_citations)

In [17]:
head(epo_citations)

Citing_pub_nbr,Citing_pub_date,Citing_app_nbr,Citing_appln_id,Cited_pub_nbr,Cited_pub_date,Cited_App_auth,Cited_App_nbr,Cited_Appln_id,Cit_Total,Citn_origin,Citn_category,Citn_lag_year,Citn_lag_month,PCT_Route
<chr>,<IDate>,<chr>,<int>,<chr>,<IDate>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>
EP0000001,1978-12-20,EP19780200013,16428854,DE2161506,1973-06-14,DE,DE2161506,9912741,7,SEA,A,5,66,0
EP0000001,1978-12-20,EP19780200013,16428854,FR2025459,1970-09-11,FR,FR6942154,19548401,7,SEA,,8,99,0
EP0000001,1978-12-20,EP19780200013,16428854,US3532159,1970-10-06,US,US3532159D,49957875,7,SEA,,8,98,0
EP0000001,1978-12-20,EP19780200013,16428854,US3568762,1971-03-09,US,US3568762D,50023219,7,SEA,,7,93,0
EP0000001,1978-12-20,EP19780200013,16428854,US3913665,1975-10-21,US,US40265573,50609500,7,SEA,,3,38,0
EP0000001,1978-12-20,EP19780200013,16428854,US4018269,1977-04-19,US,US46803374,51119449,7,SEA,,1,20,0


In [18]:
epo_citations %>%
    .[Citing_pub_nbr == 'EP2030891']

Citing_pub_nbr,Citing_pub_date,Citing_app_nbr,Citing_appln_id,Cited_pub_nbr,Cited_pub_date,Cited_App_auth,Cited_App_nbr,Cited_Appln_id,Cit_Total,Citn_origin,Citn_category,Citn_lag_year,Citn_lag_month,PCT_Route
<chr>,<IDate>,<chr>,<int>,<chr>,<IDate>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>
EP2030891,2009-03-04,EP20080010159,213,DE3141869,1983-05-11,DE,DE3141869,10572877,4,APP,,26,310,0
EP2030891,2009-03-04,EP20080010159,213,DE3141869,1983-05-11,DE,DE3141869,10572877,4,SEA,,26,310,0
EP2030891,2009-03-04,EP20080010159,213,DE3501887,1986-07-24,DE,DE3501887,10818969,4,SEA,XA,23,272,0
EP2030891,2009-03-04,EP20080010159,213,DE102005002370,2006-07-27,DE,DE102005002370,14971442,4,SEA,Y,3,32,0
EP2030891,2009-03-04,EP20080010159,213,DE102005045181,2007-04-05,DE,DE102005045181,15003772,4,SEA,Y,2,23,0


In [19]:
epo_cit_counts <- fread('./data/202208_EPO_CIT_COUNTS.txt')

In [20]:
dim(epo_cit_counts)

In [21]:
head(epo_cit_counts)

EP_Pub_nbr,EP_Pub_date,EP_Appln_id,WO_Pub_nbr,WO_Appln_id,EP_Grant,Withdrawal,Refusal,EP_Pat_Cits,EP_NPL_Cits,⋯,Recd_asWO_byWO,Direct_cits_Recd,Recd_asEQV_byEP,Recd_asEQV_byWO,Total_cits_Recd,Recd_in3_asEP,Recd_in3_asWO,Direct_cits_Recd_in3,Recd_in3_asEQV,Total_cits_Recd_in3
<chr>,<int>,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
EP0000001,19781220,16428854,,,19810107.0,0,0,7,0,⋯,0,2,0,2,4,1,0,1,0,1
EP0000002,19781220,16427091,,,19810826.0,0,0,2,1,⋯,0,4,0,0,4,1,0,1,0,1
EP0000003,19781220,16429215,,,,1,0,1,0,⋯,0,0,0,0,0,0,0,0,0,0
EP0000004,19781220,16427120,,,19800903.0,0,0,4,0,⋯,0,0,0,0,0,0,0,0,0,0
EP0000005,19781220,16427159,,,19801029.0,0,0,3,0,⋯,0,4,1,0,5,1,0,1,1,2
EP0000006,19781220,16428868,,,19810204.0,0,0,3,2,⋯,0,1,0,0,1,0,0,0,0,0


In [24]:
epo_cit_counts %>%
    .[EP_Pub_nbr == 'EP1920908']

EP_Pub_nbr,EP_Pub_date,EP_Appln_id,WO_Pub_nbr,WO_Appln_id,EP_Grant,Withdrawal,Refusal,EP_Pat_Cits,EP_NPL_Cits,⋯,Recd_asWO_byWO,Direct_cits_Recd,Recd_asEQV_byEP,Recd_asEQV_byWO,Total_cits_Recd,Recd_in3_asEP,Recd_in3_asWO,Direct_cits_Recd_in3,Recd_in3_asEQV,Total_cits_Recd_in3
<chr>,<int>,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
EP1920908,20080514,156990,,,20150408,0,0,4,0,⋯,0,4,0,1,5,1,0,1,0,1


In [26]:
airbus_ep_application$patentNumber -> foo


In [32]:
airbus_ep_publications$patentNumber -> foo

In [33]:
dim(airbus_ep_publications)

In [49]:
sample(foo, 5) -> foo_sample
print(foo_sample)
epo_cit_counts %>%
    .[EP_Pub_nbr %in% foo_sample] %>%
    head(10)

[1] "EP3173338" "EP3883144" "EP2571764" "EP3045560" "EP2757350"


EP_Pub_nbr,EP_Pub_date,EP_Appln_id,WO_Pub_nbr,WO_Appln_id,EP_Grant,Withdrawal,Refusal,EP_Pat_Cits,EP_NPL_Cits,⋯,Recd_asWO_byWO,Direct_cits_Recd,Recd_asEQV_byEP,Recd_asEQV_byWO,Total_cits_Recd,Recd_in3_asEP,Recd_in3_asWO,Direct_cits_Recd_in3,Recd_in3_asEQV,Total_cits_Recd_in3
<chr>,<int>,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
EP2757350,20140723,380983809,,,20150916.0,0,0,3,0,⋯,0,0,0,1,1,0,0,0,0,0
EP3045560,20160720,448472760,,,,0,0,3,1,⋯,0,0,0,0,0,0,0,0,0,0
EP3173338,20170531,471830403,,,,0,0,10,0,⋯,0,2,0,0,2,1,0,1,0,1


In [45]:
colnames(epo_cit_counts)

In [46]:
colnames(epo_citations)