In [13]:
import neo4j
import pandas as pd
import random
import numpy as np
import datetime
from credentials import uri, user, pwd
from patent_neo4j.connection import Neo4jConnection
from functools import reduce

In [14]:
df = pd.read_csv("./Data/assignee_multiple_patents.csv")

In [15]:
Q1 = df['patent_counts'].quantile(0.25)
Q3 = df['patent_counts'].quantile(0.75)

In [16]:
random.seed(10)
df = df[(df["patent_counts"] <= Q3) & (df["patent_counts"] >= Q1)].sample(2500)

In [17]:
df.head()

Unnamed: 0,assignee_id,patent_counts
14379,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,19
48316,fcfea1f7-158b-423b-9f77-50f6c64c199b,18
4237,16208fc2-e859-48d0-9751-4a78c5d62ba8,36
29159,99b1d99c-bc9e-475c-ba9f-b0d7bcdf6813,13
11627,3cc84519-4093-456f-bfbb-533772210103,14


In [18]:
assignee_list = list(df["assignee_id"])

In [106]:
conn = Neo4jConnection(uri, user, pwd)

In [144]:
ai = conn.query_assignee_patents(assignee_list=assignee_list)

In [145]:
ai.head()

Unnamed: 0,assignee_id,patent_id,patent_date,nber,num_citation
0,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,8584588,2013-11-19,19,1
1,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,8387503,2013-03-05,59,3
2,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,8151712,2012-04-10,19,1
3,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,7878121,2011-02-01,19,1
4,4b6dce64-f040-48c6-8e1e-40f439ea1f9f,7450472,2008-11-11,21,8


In [146]:
ai = ai.dropna()
ai["nber_category"] = ai["nber"].apply(lambda x: x[0])

In [147]:
ai['patent_date'] = ai['patent_date'].astype(str).apply(datetime.datetime.strptime, args=("%Y-%m-%d",))

In [148]:
ai = ai.sort_values(by=["assignee_id", "patent_date"], ascending=[True, True]).reset_index()

In [149]:
assignee_set = set(ai["assignee_id"])

In [150]:
ai["switch"] = 0

In [151]:
ai.head()

Unnamed: 0,index,assignee_id,patent_id,patent_date,nber,num_citation,nber_category,switch
0,22181,0002c937-3be3-498d-a07e-3b237bb9aed4,3930382,1976-01-06,53,7,5,0
1,22180,0002c937-3be3-498d-a07e-3b237bb9aed4,3938632,1976-02-17,53,12,5,0
2,22178,0002c937-3be3-498d-a07e-3b237bb9aed4,3978950,1976-09-07,53,6,5,0
3,22179,0002c937-3be3-498d-a07e-3b237bb9aed4,3978949,1976-09-07,53,7,5,0
4,22177,0002c937-3be3-498d-a07e-3b237bb9aed4,4311220,1982-01-19,53,3,5,0


In [152]:
for a in assignee_set:
    indices = ai[ai["assignee_id"] == a].index
    initial_roots = {ai.iloc[indices[0],6]}
    num_switch = 0
    for i in indices[1:]:
        next_patent_root = ai.iloc[i,6]

        if next_patent_root not in initial_roots:
            num_switch = num_switch + 1
            
        ai.iloc[i,7] = num_switch
        initial_roots = initial_roots.union(next_patent_root) 

In [163]:
ai[ai["assignee_id"] == list(assignee_set)[9]]

Unnamed: 0,index,assignee_id,patent_id,patent_date,nber,num_citation,nber_category,switch
27148,20546,e11effdb-876a-4f72-a844-0be9fadb738d,5660758,1997-08-26,69,20,6,0
27149,20545,e11effdb-876a-4f72-a844-0be9fadb738d,5667870,1997-09-16,69,35,6,0
27150,20544,e11effdb-876a-4f72-a844-0be9fadb738d,5774335,1998-06-30,45,21,4,1
27151,20543,e11effdb-876a-4f72-a844-0be9fadb738d,5784257,1998-07-21,45,39,4,1
27152,20542,e11effdb-876a-4f72-a844-0be9fadb738d,5825622,1998-10-20,45,26,4,1
27153,20541,e11effdb-876a-4f72-a844-0be9fadb738d,5945736,1999-08-31,46,34,4,1
27154,20540,e11effdb-876a-4f72-a844-0be9fadb738d,6014315,2000-01-11,45,28,4,1
27155,20539,e11effdb-876a-4f72-a844-0be9fadb738d,6021045,2000-02-01,45,54,4,1
27156,20538,e11effdb-876a-4f72-a844-0be9fadb738d,6048919,2000-04-11,15,84,1,2
27157,20537,e11effdb-876a-4f72-a844-0be9fadb738d,6075699,2000-06-13,45,27,4,2


In [164]:
ai.to_csv("switchers.csv", index=False)

In [82]:
ai[ai["assignee_id"] == '0002c937-3be3-498d-a07e-3b237bb9aed4'].iloc[i,6] = 1

In [42]:
assignee_info = ai.groupby('assignee_id').agg(list).reset_index()

In [29]:
assignee_info["roots"] = assignee_info["nber_category"].apply(set)
assignee_info["fine_roots"] = assignee_info["nber"].apply(set)

In [35]:
assignee_info.tail(10)

Unnamed: 0,assignee_id,patent_id,patent_date,nber,num_citation,nber_category,roots,fine_roots,num_roots,num_fine_roots,num_patents,total_citation,avg_citation
2243,ff70d7f9-1de1-4066-808a-c209dfde1426,"[4296536, 4623150, 4681768, 4974755, 5350233, ...","[1981-10-27 00:00:00, 1986-11-18 00:00:00, 198...","[59, 62, 61, 19, 19, 51, 68, 62, 62, 62, 59]","[6, 88, 20, 12, 5, 3, 6, 7, 2, 3, 6]","[5, 6, 6, 1, 1, 5, 6, 6, 6, 6, 5]","{1, 5, 6}","{61, 59, 51, 62, 19, 68}",3,6,11,158,14.363636
2244,ff8f6b90-f8b0-42dc-b065-2270ca32291d,"[7232254, 7597471, 7946755, 8210739, 8240910, ...","[2007-06-19 00:00:00, 2009-10-06 00:00:00, 201...","[69, 69, 69, 69, 69, 69]","[1, 1, 3, 4, 4, 1]","[6, 6, 6, 6, 6, 6]",{6},{69},1,1,6,14,2.333333
2245,ff96ba2f-4eff-4a53-a342-617ae5d35fec,"[4735896, 4833071, 4879211, 4879212, 5106726, ...","[1988-04-05 00:00:00, 1989-05-23 00:00:00, 198...","[31, 31, 31, 31, 31, 31, 31, 15, 31, 15, 31, 3...","[42, 26, 4, 35, 72, 3, 34, 3, 8, 3, 11, 77, 77...","[3, 3, 3, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, ...","{1, 3}","{19, 15, 31}",2,3,38,700,18.421053
2246,ffb2ddf2-6b51-4a7a-a163-329fb52112ee,"[6166716, 6227484, 6766946, 6942143, 7032819, ...","[2000-12-26 00:00:00, 2001-05-08 00:00:00, 200...","[25, 55, 59, 59, 59, 25, 21, 22, 59, 25, 25, 2...","[6, 6, 69, 15, 8, 79, 2, 9, 2, 5, 2, 10, 1, 3,...","[2, 5, 5, 5, 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, ...","{2, 5}","{25, 55, 21, 59, 24, 22, 23}",2,7,17,224,13.176471
2247,ffb57a5c-9ee5-430e-b121-4b3aa38c3c1c,"[7165982, 7168984, 7193183, 7285015, 7338313, ...","[2007-01-23 00:00:00, 2007-01-30 00:00:00, 200...","[41, 41, 49, 41, 41, 41, 52, 59, 41, 41, 41, 53]","[5, 3, 1, 13, 1, 2, 6, 1, 2, 1, 3, 1]","[4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 5]","{5, 4}","{59, 49, 53, 41, 52}",2,5,12,39,3.25
2248,ffe0afc2-2818-4240-ba6d-012a44940f31,"[7846174, 8100931, 8372087, 8529431, 8672831, ...","[2010-12-07 00:00:00, 2012-01-24 00:00:00, 201...","[32, 32, 32, 32, 32, 32, 32]","[55, 26, 11, 16, 10, 6, 9]","[3, 3, 3, 3, 3, 3, 3]",{3},{32},1,1,7,133,19.0
2249,ffe20f1e-4d0c-48e6-ae7e-d6c459706147,"[3968277, 4044713, 4320120, 4410724, 4452786, ...","[1976-07-06 00:00:00, 1977-08-30 00:00:00, 198...","[12, 12, 31, 14, 31, 31, 31, 31, 31, 31, 14, 3...","[7, 1, 1, 4, 1, 3, 4, 1, 2, 2, 7, 1, 2, 40, 2,...","[1, 1, 3, 1, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, ...","{1, 3}","{14, 12, 31}",2,3,23,169,7.347826
2250,ffe8339b-3646-4f72-819a-3ae5f291c79c,"[6205664, 7374250, 7744165, 8033617]","[2001-03-27 00:00:00, 2008-05-20 00:00:00, 201...","[52, 55, 55, 55]","[10, 3, 2, 1]","[5, 5, 5, 5]",{5},"{55, 52}",1,2,4,16,4.0
2251,ffeb6305-fbc7-40d9-9cc4-6e8f010fd6a0,"[4630216, 5386364, 5548231, 5642767, 5655677, ...","[1986-12-16 00:00:00, 1995-01-31 00:00:00, 199...","[22, 22, 41, 69, 68, 46, 46, 46, 22, 46, 59, 4...","[113, 15, 18, 6, 26, 17, 118, 17, 36, 15, 12, ...","[2, 2, 4, 6, 6, 4, 4, 4, 2, 4, 5, 4, 4, 4, 2, ...","{2, 5, 4, 6}","{69, 68, 59, 22, 41, 46}",4,6,27,598,22.148148
2252,fff2a352-49fb-43fd-9b4a-852eb210ec8d,"[6257463, 7789255]","[2001-07-10 00:00:00, 2010-09-07 00:00:00]","[19, 68]","[35, 6]","[1, 6]","{1, 6}","{19, 68}",2,2,2,41,20.5


In [31]:
assignee_info["num_roots"] = assignee_info["roots"].apply(len)
assignee_info["num_fine_roots"] = assignee_info["fine_roots"].apply(len)
assignee_info["num_patents"] = assignee_info["patent_id"].apply(len)
assignee_info["total_citation"] = assignee_info["num_citation"].apply(sum)
assignee_info["avg_citation"] = assignee_info["total_citation"]/assignee_info["num_patents"]

In [33]:
assignee_info.head(5)

Unnamed: 0,assignee_id,patent_id,patent_date,nber,num_citation,nber_category,roots,fine_roots,num_roots,num_fine_roots,num_patents,total_citation,avg_citation
0,0002c937-3be3-498d-a07e-3b237bb9aed4,"[3930382, 3938632, 3978950, 3978949, 4311220, ...","[1976-01-06 00:00:00, 1976-02-17 00:00:00, 197...","[53, 53, 53, 53, 53, 53, 53, 59, 53, 53, 53, 4...","[7, 12, 6, 7, 3, 8, 5, 23, 13, 6, 4, 12, 4, 4]","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5]","{5, 4}","{59, 53, 43}",2,3,14,114,8.142857
1,00160e1a-004a-4309-a869-d3254cd858c8,"[5927408, 5931231, 5931238, 5956915, 6047775, ...","[1999-07-27 00:00:00, 1999-08-03 00:00:00, 199...","[64, 64, 64, 69, 64, 53, 52, 64, 64, 64, 64, 5...","[5, 93, 46, 7, 20, 3, 8, 24, 7, 8, 19, 23, 3, 9]","[6, 6, 6, 6, 6, 5, 5, 6, 6, 6, 6, 5, 6, 4]","{5, 6, 4}","{69, 64, 45, 53, 52}",3,5,14,275,19.642857
2,00254261-c5d4-4ad4-825b-f57aa58b8d5c,"[4700626, 4942814, 5172634, 5317968, 5458013, ...","[1987-10-20 00:00:00, 1990-07-24 00:00:00, 199...","[69, 69, 69, 69, 59, 69, 69, 69]","[3, 3, 7, 4, 1, 2, 14, 3]","[6, 6, 6, 6, 5, 6, 6, 6]","{5, 6}","{69, 59}",2,2,8,37,4.625
3,004ad217-deac-42ff-8a07-a584ed32ee47,"[4823247, 4835401, 4888672, 4901214, 5082998, ...","[1989-04-18 00:00:00, 1989-05-30 00:00:00, 198...","[45, 44, 45, 45, 49, 45, 45, 49, 45, 45, 51, 4...","[77, 3, 3, 6, 14, 3, 27, 12, 69, 11, 4, 17, 1,...","[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 5, 6]","{5, 4, 6}","{61, 51, 43, 45, 49, 44}",3,6,16,255,15.9375
4,009dfbeb-609a-4940-bce7-2a87f84c99e6,"[3941327, 3967319, 4093148, 4099601, 4132607, ...","[1976-03-02 00:00:00, 1976-06-29 00:00:00, 197...","[51, 24, 51, 53, 19, 41, 41, 52, 41, 41, 41, 4...","[1, 3, 13, 15, 3, 6, 21, 1, 11, 8, 9, 14, 22, ...","[5, 2, 5, 5, 1, 4, 4, 5, 4, 4, 4, 4, 4, 5, 5, ...","{2, 1, 5, 4}","{21, 24, 59, 51, 49, 53, 41, 19, 52}",4,9,27,241,8.925926


In [24]:
assignee_info[["num_roots", "num_fine_roots", "num_patents", "total_citation", "avg_citation"]].to_csv("roots_and_patents.csv", index=False)