In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', None)

In [3]:
applications_file = r"./data/pregrant/application.tsv"
_applications_df = pd.read_csv(applications_file, sep='\t')
# 

inventor_file = r"./data/pregrant/inventor.tsv"
_inventor_df = pd.read_csv(inventor_file, sep='\t')
# 

assignee_file = r"./data/pregrant/assignee.tsv"
_assignee_df = pd.read_csv(assignee_file, sep='\t')
# 

location_file = r"./data/pregrant/location.tsv"
_location_df = pd.read_csv(location_file, sep='\t')
# 

publication_assignee_file = r"./data/pregrant/publication_assignee.tsv"
_publication_assignee_df = pd.read_csv(publication_assignee_file, sep='\t')
# 

publication_inventor_file = r"./data/pregrant/publication_inventor.tsv"
_publication_inventor_df = pd.read_csv(publication_inventor_file, sep='\t')

pre_grant_locations_file = "./data/pregrant/location_crosswalk.csv"
pre_grant_locations_df = pd.read_csv(pre_grant_locations_file, low_memory=False)
# 

locations_file = r"./data/pregrant/location.tsv"
_locations_df = pd.read_csv(locations_file, sep='\t')




## Application Data

In [4]:
# get year from date
_applications_df = _applications_df[~_applications_df.date.isnull()]
_applications_df['year'] = _applications_df.date.str[:4]
_applications_df = _applications_df.assign(year = _applications_df.year.astype('int'))


_applications_df.head()

Unnamed: 0,id,document_number,type,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year
0,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020
1,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009
2,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018
3,00003dee-4904-11ec-b427-12de62d610b1,20210259523,utility,17316500,2021-05-10,US,17,MEDICAL IMAGING DEVICE WITH A TELESCOPIC SCOPE,The subject matter discloses a medical imaging...,False,,2021
4,00004fba-ccf9-11ea-ba95-121df0c29c1e,20100070355,utility,12623189,2009-11-20,US,12,Methods for Transmitting Multimedia Files and ...,The invention is directed to a method of trans...,False,ipa100318.xml,2009


In [None]:
# _applications_df.year.value_counts()

## publication_assignee crosswalk

In [5]:
_publication_assignee_df.head()

Unnamed: 0,document_number,assignee_id,sequence,location_id
0,20050000014,b27df54c-0a0d-4ae9-98c7-ccd172eb0c0e,1,f54d6149-cb8e-11eb-9615-121df0c29c1e
1,20050000023,1293d184-e3b2-467d-95f3-02fb4473cf6e,1,cfe77bc3-cb8e-11eb-9615-121df0c29c1e
2,20050000025,6f07167a-0983-4bb7-b7dc-0cd391e6c279,1,d6338035-cb8e-11eb-9615-121df0c29c1e
3,20050000029,417a65f9-a4c7-4061-964f-ffd992ce74bd,1,3cb80671-cb8e-11eb-9615-121df0c29c1e
4,20050000031,417a65f9-a4c7-4061-964f-ffd992ce74bd,1,3cb80671-cb8e-11eb-9615-121df0c29c1e


In [6]:
# Fixing one to many problem here.
publication_assignee_df = pd.DataFrame(_publication_assignee_df.groupby(['document_number', 'assignee_id', 'location_id']).size()).reset_index()
publication_assignee_df = publication_assignee_df.rename(columns={0: "count"})
publication_assignee_df.sort_values(by='count', ascending=False)

Unnamed: 0,document_number,assignee_id,location_id,count
1561380,20130067161,ae107234-b77a-4d73-97bc-ae1316ece835,e8e360fe-cb8e-11eb-9615-121df0c29c1e,5
1001823,20100254991,93c50a7b-1b47-4b8c-9474-7497967beaed,3e7b68e2-cb8e-11eb-9615-121df0c29c1e,4
406839,20070251083,ee88a895-514d-40bb-b9a6-db8fb20ad697,3cb80671-cb8e-11eb-9615-121df0c29c1e,4
1542091,20130041234,1219f1dd-7ce7-40fc-ae46-ac91aaf6a02c,e7275ca8-cb8f-11eb-9615-121df0c29c1e,4
2363595,20170119177,4e8fc194-af9a-41b8-931c-cd3ef3db9c11,fff50a17-cb8f-11eb-9615-121df0c29c1e,3
...,...,...,...,...
990773,20100238976,c08bdcaa-2331-4fa7-9e90-e5a4a659fc73,ec16f9be-cb90-11eb-9615-121df0c29c1e,1
990774,20100238977,80306c57-ba73-4e88-97c6-81b918f2dcf0,fe4bbe3c-cb8f-11eb-9615-121df0c29c1e,1
990775,20100238978,e77c94a4-d0b0-449e-be41-898f5f1bbc04,fdb12d00-cb8f-11eb-9615-121df0c29c1e,1
990776,20100238982,78b461d5-10ed-4ae2-b411-380977df8d07,fd46655c-09bc-11ec-893a-12de62d610b1,1


In [7]:
application_publication_assignee_df = pd.merge(_applications_df, publication_assignee_df, on=['document_number'], how='inner', indicator='matched', validate='one_to_many')
application_publication_assignee_df = application_publication_assignee_df.drop('matched', axis=1)

application_publication_assignee_df.head()

Unnamed: 0,id,document_number,type,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,assignee_id,location_id,count
0,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,1
1,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,7b2f1943-78fd-4a9f-9c3e-d32ca19cc371,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1
2,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,b4401241-2dd1-46d2-af15-8d91b699e960,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1
3,00004fba-ccf9-11ea-ba95-121df0c29c1e,20100070355,utility,12623189,2009-11-20,US,12,Methods for Transmitting Multimedia Files and ...,The invention is directed to a method of trans...,False,ipa100318.xml,2009,c0129a05-4813-44df-871d-205e59aa0bf7,fd10d2b1-cb8e-11eb-9615-121df0c29c1e,1
4,0000680f-4904-11ec-b427-12de62d610b1,20210259524,utility,17319550,2021-05-13,US,17,"ENDOSCOPE HEAD, ENDOSCOPE AND ALBARRAN LEVER H...",The invention refers to an attachment for an e...,False,,2021,68671a0c-3274-4382-8c88-40b8cbb31765,cfca1ca8-cb90-11eb-9615-121df0c29c1e,1


## Assignees


In [8]:
_assignee_df.head()

Unnamed: 0,id,type,name_first,name_last,organization
0,000049db-1cf0-4735-8a71-5a44b04859a6,2.0,,,"Atlantech International, Inc."
1,000074b2-2134-459b-a481-26c72c2313ae,3.0,,,Shanghai Juge Electronics Technologies Co. Ltd.
2,00007cb5-f03a-4547-8b7a-dd97dd959ef8,2.0,,,UrVibe LLC
3,0000c3c1-5ce8-4f80-8e21-d263467ab1fd,4.0,Wiley L.,"Day, Jr.",
4,0000c853-052b-4c30-acea-0c64e371349c,2.0,,,"Ringertown Innovations, LLC"


In [11]:

print(_assignee_df.shape)
# get rid of ones that are not organizations
#assignee_df = _assignee_df[~_assignee_df.organization.isnull()]
#print(assignee_df.shape)
_assignee_df.head()

(540183, 5)


Unnamed: 0,id,type,name_first,name_last,organization
0,000049db-1cf0-4735-8a71-5a44b04859a6,2.0,,,"Atlantech International, Inc."
1,000074b2-2134-459b-a481-26c72c2313ae,3.0,,,Shanghai Juge Electronics Technologies Co. Ltd.
2,00007cb5-f03a-4547-8b7a-dd97dd959ef8,2.0,,,UrVibe LLC
3,0000c3c1-5ce8-4f80-8e21-d263467ab1fd,4.0,Wiley L.,"Day, Jr.",
4,0000c853-052b-4c30-acea-0c64e371349c,2.0,,,"Ringertown Innovations, LLC"


In [12]:
# Should just be a single id per assignee but this data....
# assignee_df = pd.DataFrame(_assignee_df.groupby(['id']).size()).reset_index()
_assignee_df.id.duplicated().sum()
#pd.DataFrame(_assignee_df.groupby(['id']).size()).value_counts()

0

In [13]:
# application_assignee_df = pd.merge(application_publication_assignee_df, _assignee_df, left_on=['assignee_id'], right_on=['id'], how='left', indicator='matched', validate='many_to_one')
application_assignee_df = pd.merge(application_publication_assignee_df, _assignee_df, left_on=['assignee_id'], right_on=['id'], how='left')
# application_assignee_df.query("matched!='both'")

# get rid of places that didn't have an assignee...
# application_assignee_df = application_assignee_df[~application_assignee_df.id.isnull()]
application_assignee_df

Unnamed: 0,id_x,document_number,type_x,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,assignee_id,location_id,count,id_y,type_y,name_first,name_last,organization
0,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,1,10068d52-f4ee-47a2-9950-599af79a6484,2.0,,,"VISA USA, INC."
1,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,7b2f1943-78fd-4a9f-9c3e-d32ca19cc371,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1,7b2f1943-78fd-4a9f-9c3e-d32ca19cc371,3.0,,,TOHOKU UNIVERSITY
2,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,b4401241-2dd1-46d2-af15-8d91b699e960,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1,b4401241-2dd1-46d2-af15-8d91b699e960,3.0,,,"TOHOKU STEEL CO., LTD."
3,00004fba-ccf9-11ea-ba95-121df0c29c1e,20100070355,utility,12623189,2009-11-20,US,12,Methods for Transmitting Multimedia Files and ...,The invention is directed to a method of trans...,False,ipa100318.xml,2009,c0129a05-4813-44df-871d-205e59aa0bf7,fd10d2b1-cb8e-11eb-9615-121df0c29c1e,1,c0129a05-4813-44df-871d-205e59aa0bf7,3.0,,,Clarity Pharmaceuticals Ltd
4,0000680f-4904-11ec-b427-12de62d610b1,20210259524,utility,17319550,2021-05-13,US,17,"ENDOSCOPE HEAD, ENDOSCOPE AND ALBARRAN LEVER H...",The invention refers to an attachment for an e...,False,,2021,68671a0c-3274-4382-8c88-40b8cbb31765,cfca1ca8-cb90-11eb-9615-121df0c29c1e,1,68671a0c-3274-4382-8c88-40b8cbb31765,3.0,,,DIGITAL ENDOSCOPY GMBH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2969382,ffffe76f-ccf8-11ea-ba95-121df0c29c1e,20100070343,utility,12556076,2009-09-09,US,12,"SYSTEM AND METHOD FOR AGGREGATION, ANALYSIS, P...",Embodiments of systems and methods for the agg...,False,ipa100318.xml,2009,3341dd6e-43f6-4853-a436-b092a65c4a60,fe664e0e-cb8e-11eb-9615-121df0c29c1e,1,3341dd6e-43f6-4853-a436-b092a65c4a60,2.0,,,Truecar.com
2969383,ffffe9d9-ccf8-11ea-ba95-121df0c29c1e,20100070344,utility,12556109,2009-09-09,US,12,SYSTEM AND METHOD FOR CALCULATING AND DISPLAYI...,Embodiments of systems and methods for the agg...,False,ipa100318.xml,2009,3341dd6e-43f6-4853-a436-b092a65c4a60,fe664e0e-cb8e-11eb-9615-121df0c29c1e,1,3341dd6e-43f6-4853-a436-b092a65c4a60,2.0,,,Truecar.com
2969384,fffff581-ccf8-11ea-ba95-121df0c29c1e,20100070349,utility,12517260,2007-11-29,US,12,ROAD TOLL SYSTEM,A road toll system comprises a vehicle-mounted...,False,ipa100318.xml,2007,d25fa625-726a-4c37-a21e-7606b802cf50,ee4e6706-cb8f-11eb-9615-121df0c29c1e,1,d25fa625-726a-4c37-a21e-7606b802cf50,3.0,,,NXP B.V.
2969385,fffff7d2-ccf8-11ea-ba95-121df0c29c1e,20100070350,utility,12259937,2008-10-28,US,12,DYNAMIC PRICING FOR CONTENT PRESENTATIONS,A request for content is received. First conte...,False,ipa100318.xml,2008,2721c099-5a78-45fa-b3a7-11d119300596,ff4c2272-cb8e-11eb-9615-121df0c29c1e,1,2721c099-5a78-45fa-b3a7-11d119300596,2.0,,,Google LLC


In [14]:
#all have assignees
#application_assignee_df.assignee_id.isnull().sum()

In [15]:
application_assignee_df.head()

Unnamed: 0,id_x,document_number,type_x,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,assignee_id,location_id,count,id_y,type_y,name_first,name_last,organization
0,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,1,10068d52-f4ee-47a2-9950-599af79a6484,2.0,,,"VISA USA, INC."
1,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,7b2f1943-78fd-4a9f-9c3e-d32ca19cc371,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1,7b2f1943-78fd-4a9f-9c3e-d32ca19cc371,3.0,,,TOHOKU UNIVERSITY
2,00001b62-f3c4-11eb-b0cf-121df0c29c1e,20210172812,utility,16623118,2018-04-20,US,16,"ENERGY CONVERTER, VIBRATION POWER GENERATOR, F...",An energy converter is formed by bonding a sol...,False,,2018,b4401241-2dd1-46d2-af15-8d91b699e960,baa6fcdc-cb8e-11eb-9615-121df0c29c1e,1,b4401241-2dd1-46d2-af15-8d91b699e960,3.0,,,"TOHOKU STEEL CO., LTD."
3,00004fba-ccf9-11ea-ba95-121df0c29c1e,20100070355,utility,12623189,2009-11-20,US,12,Methods for Transmitting Multimedia Files and ...,The invention is directed to a method of trans...,False,ipa100318.xml,2009,c0129a05-4813-44df-871d-205e59aa0bf7,fd10d2b1-cb8e-11eb-9615-121df0c29c1e,1,c0129a05-4813-44df-871d-205e59aa0bf7,3.0,,,Clarity Pharmaceuticals Ltd
4,0000680f-4904-11ec-b427-12de62d610b1,20210259524,utility,17319550,2021-05-13,US,17,"ENDOSCOPE HEAD, ENDOSCOPE AND ALBARRAN LEVER H...",The invention refers to an attachment for an e...,False,,2021,68671a0c-3274-4382-8c88-40b8cbb31765,cfca1ca8-cb90-11eb-9615-121df0c29c1e,1,68671a0c-3274-4382-8c88-40b8cbb31765,3.0,,,DIGITAL ENDOSCOPY GMBH


In [16]:
# Getting list of location_ids that we used. MHK will use this for geocoding
assignee_location_ids = pd.DataFrame(application_assignee_df.location_id.unique())
assignee_location_ids = assignee_location_ids.rename(columns={0: "location_id"})
# assignee_location_ids.shape




### Merge this again with location.

In [None]:
pre_grant_locations_df.head()

## One thing to note is that I dropped all locations that were not in the United States. However, there are a lot of US patents with assignees outside of the US.


In [17]:
test_merge = pd.merge(application_assignee_df, pre_grant_locations_df, 
                      left_on=['location_id'], 
                      right_on=['id'], 
                      how='left', 
                      indicator='matched', 
                      validate='many_to_one')

print(test_merge.query("matched=='both'").shape)
shape1 = test_merge.query("matched=='both'").shape

(1034622, 37)


In [18]:
test_merge = pd.merge(application_assignee_df, _locations_df, 
                      left_on=['location_id'], 
                      right_on=['id'], 
                      how='left', 
                      indicator='matched', 
                      validate='many_to_one')

print(test_merge.query("matched=='both'").shape)
shape2 = test_merge.query("matched=='both'").query("country_y=='US'").shape

(2952777, 30)


In [None]:
test_merge.head()

In [None]:
print(shape1)
print(shape2)
print(shape2[0]-shape1[0])

In [19]:
application_assignee_location_df = pd.merge(application_assignee_df, pre_grant_locations_df, left_on=['location_id'], right_on=['id'], how='left')

In [21]:
# application_assignee_location_df['GEOID'].head()

In [22]:
print(application_assignee_location_df.shape)
print(application_assignee_location_df.query("GEOID.notnull()", engine="python").shape)
# application_assignee_location_GEOIDS_df = application_assignee_location_df.query("GEOID.notnull() & organization.notnull()", engine="python").copy()
application_assignee_location_GEOIDS_df = application_assignee_location_df.query("GEOID.notnull()", engine="python").copy()
application_assignee_location_GEOIDS_df.head()

(2969387, 36)
(1034622, 36)


Unnamed: 0,id_x,document_number,type_x,application_number,date,country_x,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,assignee_id,location_id,count,id_y,type_y,name_first,name_last,organization,id,GEOID,pv_city,gl_city,pv_state,gl_state,country_y,pv_lat,gl_lat,pv_long,gl_long,pv_county,gl_county,pv_state_fips,pv_county_fips,gl_county_fips
0,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,1,10068d52-f4ee-47a2-9950-599af79a6484,2.0,,,"VISA USA, INC.",f9139cb2-cb8f-11eb-9615-121df0c29c1e,6075,San Francisco,,CA,,US,37.7292,,-123.047,,San Francisco,,6.0,6075.0,
7,00007a2d-ccf9-11ea-ba95-121df0c29c1e,20100070357,utility,12334277,2008-12-12,US,12,INCENTIVE BASED MARKETING THROUGH SOCIAL NETWORKS,A method and system for providing an incentive...,False,ipa100318.xml,2008,c912d9d3-f86c-4c2c-8bc9-c91a9b4511ab,ffc9f8bd-cb8e-11eb-9615-121df0c29c1e,1,c912d9d3-f86c-4c2c-8bc9-c91a9b4511ab,2.0,,,"AT&T Intellectual Property I, L.P.",ffc9f8bd-cb8e-11eb-9615-121df0c29c1e,32031,Reno,,NV,,US,39.5504,,-119.803,,Washoe,,32.0,32031.0,
8,00007d04-ccf9-11ea-ba95-121df0c29c1e,20100070358,utility,12561091,2009-09-16,US,12,REC CREDIT DISTRIBUTION SYSTEM AND METHOD,A method for promoting recycling from a fund e...,False,ipa100318.xml,2009,c1bd445e-3bcd-4b27-855a-651207bb56e3,9596257a-cb90-11eb-9615-121df0c29c1e,1,c1bd445e-3bcd-4b27-855a-651207bb56e3,2.0,,,"Casella Waste Systems, Inc.",9596257a-cb90-11eb-9615-121df0c29c1e,50021,Rutland,,VT,,US,43.6106,,-72.9726,,Rutland,,50.0,50021.0,
10,0000827d-ccf9-11ea-ba95-121df0c29c1e,20100070360,utility,12339981,2008-12-19,US,12,SYSTEM AND METHOD FOR CREATING A SPEECH SEARCH...,"Disclosed herein are systems, methods, and com...",False,ipa100318.xml,2008,c912d9d3-f86c-4c2c-8bc9-c91a9b4511ab,ffc9f8bd-cb8e-11eb-9615-121df0c29c1e,1,c912d9d3-f86c-4c2c-8bc9-c91a9b4511ab,2.0,,,"AT&T Intellectual Property I, L.P.",ffc9f8bd-cb8e-11eb-9615-121df0c29c1e,32031,Reno,,NV,,US,39.5504,,-119.803,,Washoe,,32.0,32031.0,
11,00008c11-ccf9-11ea-ba95-121df0c29c1e,20100070363,utility,12623282,2009-11-20,US,12,INTERNET STRAWMAN AND USER INTERFACE THEREFOR,A computer implemented method for facilitating...,False,ipa100318.xml,2009,89e5d47d-a6c8-4dbc-b190-2a0bb9fb5970,499f8deb-cb8e-11eb-9615-121df0c29c1e,1,89e5d47d-a6c8-4dbc-b190-2a0bb9fb5970,2.0,,,SONY ELECTRONICS INC.,499f8deb-cb8e-11eb-9615-121df0c29c1e,34003,Parkridge,Park Ridge,NJ,NJ,US,,-74.03939,,41.03379,,Bergen,34.0,,34003.0


In [30]:
denormalized_application_assignee = application_assignee_location_GEOIDS_df[['document_number', 'type_x', 'application_number', 'year', 'assignee_id', 'location_id', 'GEOID', 'organization']]

## Inventors

## To Do:

* Finish Inventory crosswalks.
* Combine assignee and inventor crosswalks.
* Aggregate assignee and inventor crosswalks.
* Combine with granted data

What do we need:
* Assignee information
    * 
* Inventor information
    * Gender counts
    * Number of inventors
    * Team size
    * 



In [23]:
_publication_inventor_df.head()

Unnamed: 0,document_number,inventor_id,sequence,location_id
0,20050000001,fl:ti_ln:goldkind-1,1,fa3d02fd-09bd-11ec-893a-12de62d610b1
1,20050000002,fl:je_ln:levy-10,2,b778d60a-cb8e-11eb-9615-121df0c29c1e
2,20050000002,fl:ph_ln:levy-4,1,f8b3a9cd-cb90-11eb-9615-121df0c29c1e
3,20050000003,9958fa19-3b0f-11eb-a3cd-121df0c29c1e,1,4c4e3991-cb8e-11eb-9615-121df0c29c1e
4,20050000004,fl:da_ln:yun-29,1,efa16d0b-cb8f-11eb-9615-121df0c29c1e


In [25]:
# Fixing one to many problem here.
publication_inventor_df = pd.DataFrame(_publication_inventor_df.groupby(['document_number', 'inventor_id', 'location_id']).size()).reset_index()
publication_inventor_df = publication_inventor_df.rename(columns={0: "count"})
publication_inventor_df.sort_values(by='count')

# df.groupby(['name','month'])['text'].apply(lambda x: ','.join(x)).reset_index()   


Unnamed: 0,document_number,inventor_id,location_id,count
0,20050000001,fl:ti_ln:goldkind-1,fa3d02fd-09bd-11ec-893a-12de62d610b1,1
11235789,20170146912,fl:ha_ln:mann-2,ab02f7aa-cb8e-11eb-9615-121df0c29c1e,1
11235790,20170146913,fl:sh_ln:hirukawa-3,e85c02d5-cb8f-11eb-9615-121df0c29c1e,1
11235791,20170146913,fl:ta_ln:kudo-41,5c5ac67d-cb8e-11eb-9615-121df0c29c1e,1
11235792,20170146914,fl:no_ln:saito-49,fd46655c-09bc-11ec-893a-12de62d610b1,1
...,...,...,...,...
5461708,20110224034,fl:qi_ln:tu-7,3ce1d531-cb8e-11eb-9615-121df0c29c1e,3
7966937,20140135393,fl:si_ln:roy-10,3c736a08-cb8e-11eb-9615-121df0c29c1e,3
1427052,20060269535,fl:a._ln:naidu-4,f33e7145-cb8e-11eb-9615-121df0c29c1e,3
13400039,20190136670,fl:al_ln:gorbunov-1,e8ee00ec-cb90-11eb-9615-121df0c29c1e,3


## join to application

- many to many? on doc # and location id
    - one application may have multiple inventors 
    - one inventor could have multiple patent applications

In [None]:
# test_merge = pd.merge(application_assignee_df, _publication_inventor_df, 
#                       on=['document_number', 'location_id'], 
#                       how='left', 
#                       indicator='matched', 
#                       validate='many_to_many')

In [26]:
# application_assignee_inventor_df = pd.merge(application_assignee_df, _publication_inventor_df, 
#                       on=['document_number', 'location_id'], 
#                       how='left')

# application_assignee_inventor_df

# Merging on just the application data, no assignees yet.
application_inventor_df = pd.merge(_applications_df, _publication_inventor_df, 
                      on=['document_number'], 
                      how='left')

application_inventor_df

Unnamed: 0,id,document_number,type,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,inventor_id,sequence,location_id
0,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:br_ln:nielsen-22,1,51f0f76e-cb8e-11eb-9615-121df0c29c1e
1,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:ch_ln:hahnemann-1,3,55166334-cb8e-11eb-9615-121df0c29c1e
2,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:he_ln:frengler-1,2,
3,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:li_ln:ubbesen-1,0,eaed3473-cb8f-11eb-9615-121df0c29c1e
4,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,fl:ed_ln:fordyce-1,1,3e82ee78-cb8e-11eb-9615-121df0c29c1e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16686903,fffff7d2-ccf8-11ea-ba95-121df0c29c1e,20100070350,utility,12259937,2008-10-28,US,12,DYNAMIC PRICING FOR CONTENT PRESENTATIONS,A request for content is received. First conte...,False,ipa100318.xml,2008,fl:mi_ln:hochberg-2,2,de8e30d6-cb8e-11eb-9615-121df0c29c1e
16686904,fffffadc-ccf8-11ea-ba95-121df0c29c1e,20100070351,utility,12448114,2008-04-14,US,12,ELECTRONIC ADVERTISEMENT METHOD AND SYSTEM USI...,The present invention is related to an electro...,False,ipa100318.xml,2008,fl:do_ln:kang-212,1,ff7e35db-cb90-11eb-9615-121df0c29c1e
16686905,fffffd13-4903-11ec-b427-12de62d610b1,20210259521,utility,17165645,2021-02-02,US,17,CONTROLLER FOR SELECTIVELY CONTROLLING MANUAL ...,A system configured to control an endoscope pr...,False,,2021,fl:ch_ln:hwang-196,0,a0e9bce7-cb8e-11eb-9615-121df0c29c1e
16686906,fffffd62-ccf8-11ea-ba95-121df0c29c1e,20100070352,utility,12560644,2009-09-16,US,12,Consumer incentive system and method,A method and system to enhance the relationshi...,False,ipa100318.xml,2009,fl:wi_ln:flanders-2,1,cec0baf0-cb8e-11eb-9615-121df0c29c1e


In [28]:
application_inventor_df.duplicated().sum()

0

In [None]:
application_inventor_df.query("application_number == 13483997")

## now need to join on inventor id for male_flag

In [None]:
_inventor_df.head()

In [None]:
# test_merge = pd.merge(application_assignee_inventor_df, _inventor_df, 
#                        left_on=['inventor_id'],
#                        right_on = ['id'], 
#                        how='left', 
#                        indicator='matched', 
#                        validate='many_to_many')

In [29]:
application_inventor_male_flag_df = pd.merge(application_inventor_df, _inventor_df, 
                        left_on=['inventor_id'],
                        right_on = ['id'], 
                        how='left')                        

application_inventor_male_flag_df
denormalized_application_assignee

Unnamed: 0,id_x,document_number,type,application_number,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year,inventor_id,sequence,location_id,id_y,name_first,name_last,male_flag,attribution_status
0,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:br_ln:nielsen-22,1,51f0f76e-cb8e-11eb-9615-121df0c29c1e,fl:br_ln:nielsen-22,Brian,NIELSEN,1.0,1
1,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:ch_ln:hahnemann-1,3,55166334-cb8e-11eb-9615-121df0c29c1e,fl:ch_ln:hahnemann-1,Christina,Hahnemann,,98
2,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:he_ln:frengler-1,2,,fl:he_ln:frengler-1,Henrik,FRENGLER,1.0,1
3,00000184-4904-11ec-b427-12de62d610b1,20210259522,utility,16919476,2020-07-02,US,16,MEDICAL VISUALIZATION SYSTEM,A medical visualisation system including a fir...,False,,2020,fl:li_ln:ubbesen-1,0,eaed3473-cb8f-11eb-9615-121df0c29c1e,fl:li_ln:ubbesen-1,Line Sandahl,UBBESEN,1.0,1
4,0000021c-ccf9-11ea-ba95-121df0c29c1e,20100070354,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,fl:ed_ln:fordyce-1,1,3e82ee78-cb8e-11eb-9615-121df0c29c1e,fl:ed_ln:fordyce-1,Edward W.,"Fordyce, III",1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16686903,fffff7d2-ccf8-11ea-ba95-121df0c29c1e,20100070350,utility,12259937,2008-10-28,US,12,DYNAMIC PRICING FOR CONTENT PRESENTATIONS,A request for content is received. First conte...,False,ipa100318.xml,2008,fl:mi_ln:hochberg-2,2,de8e30d6-cb8e-11eb-9615-121df0c29c1e,fl:mi_ln:hochberg-2,Michael,Hochberg,1.0,1
16686904,fffffadc-ccf8-11ea-ba95-121df0c29c1e,20100070351,utility,12448114,2008-04-14,US,12,ELECTRONIC ADVERTISEMENT METHOD AND SYSTEM USI...,The present invention is related to an electro...,False,ipa100318.xml,2008,fl:do_ln:kang-212,1,ff7e35db-cb90-11eb-9615-121df0c29c1e,fl:do_ln:kang-212,Dong-Kyun,Kang,1.0,1
16686905,fffffd13-4903-11ec-b427-12de62d610b1,20210259521,utility,17165645,2021-02-02,US,17,CONTROLLER FOR SELECTIVELY CONTROLLING MANUAL ...,A system configured to control an endoscope pr...,False,,2021,fl:ch_ln:hwang-196,0,a0e9bce7-cb8e-11eb-9615-121df0c29c1e,fl:ch_ln:hwang-196,Charles,Hwang,1.0,1
16686906,fffffd62-ccf8-11ea-ba95-121df0c29c1e,20100070352,utility,12560644,2009-09-16,US,12,Consumer incentive system and method,A method and system to enhance the relationshi...,False,ipa100318.xml,2009,fl:wi_ln:flanders-2,1,cec0baf0-cb8e-11eb-9615-121df0c29c1e,fl:wi_ln:flanders-2,William Henry,Flanders,1.0,1


In [None]:
application_inventor_male_flag_df.groupby(['application_number']).size().sort_values(ascending = False).head(40)

In [None]:
application_inventor_male_flag_df.query("application_number == 13483997").inventor_id.value_counts()

In [None]:
application_inventor_male_flag_df.columns

In [None]:
# assignee_location_ids = pd.DataFrame(application_assignee_df.location_id.unique())
# assignee_location_ids = assignee_location_ids.rename(columns={0: "location_id"})
# assignee_location_ids.head()

application_inventor_male_flag_location_ids = pd.DataFrame(application_inventor_male_flag_df.location_id.unique())
application_inventor_male_flag_location_ids = application_inventor_male_flag_location_ids.rename(columns={0: "location_id"})
application_inventor_male_flag_location_ids.shape

In [31]:
# application_inventor_male_flag_df
# denormalized_application_assignee


application_assignee_inventor_df = pd.merge(denormalized_application_assignee, application_inventor_male_flag_df, 
                      on=['document_number'], 
                      how='left')

Unnamed: 0,document_number,type_x,application_number_x,year_x,assignee_id,location_id_x,GEOID,organization,id_x,type,application_number_y,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year_y,inventor_id,sequence,location_id_y,id_y,name_first,name_last,male_flag,attribution_status
0,20100070354,utility,12412361,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,06075,"VISA USA, INC.",0000021c-ccf9-11ea-ba95-121df0c29c1e,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,fl:ed_ln:fordyce-1,1,3e82ee78-cb8e-11eb-9615-121df0c29c1e,fl:ed_ln:fordyce-1,Edward W.,"Fordyce, III",1.0,1
1,20100070354,utility,12412361,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,06075,"VISA USA, INC.",0000021c-ccf9-11ea-ba95-121df0c29c1e,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,fl:je_ln:schulz-2,3,fe664e0e-cb8e-11eb-9615-121df0c29c1e,fl:je_ln:schulz-2,Jennifer,Schulz,1.0,1
2,20100070354,utility,12412361,2009,10068d52-f4ee-47a2-9950-599af79a6484,f9139cb2-cb8f-11eb-9615-121df0c29c1e,06075,"VISA USA, INC.",0000021c-ccf9-11ea-ba95-121df0c29c1e,utility,12412361,2009-03-27,US,12,SYSTEM AND METHOD FOR A MERCHANT DEBIT CARD PR...,A merchant debit card program is described tha...,False,ipa100318.xml,2009,fl:ma_ln:mcmahon-1,2,595dfa6d-cb8e-11eb-9615-121df0c29c1e,fl:ma_ln:mcmahon-1,Mary Pat,McMahon,0.0,1
3,20100070357,utility,12334277,2008,c912d9d3-f86c-4c2c-8bc9-c91a9b4511ab,ffc9f8bd-cb8e-11eb-9615-121df0c29c1e,32031,"AT&T Intellectual Property I, L.P.",00007a2d-ccf9-11ea-ba95-121df0c29c1e,utility,12334277,2008-12-12,US,12,INCENTIVE BASED MARKETING THROUGH SOCIAL NETWORKS,A method and system for providing an incentive...,False,ipa100318.xml,2008,fl:ch_ln:fenton-2,1,4599df9a-cb8e-11eb-9615-121df0c29c1e,fl:ch_ln:fenton-2,Charles Stanley,Fenton,1.0,1
4,20100070358,utility,12561091,2009,c1bd445e-3bcd-4b27-855a-651207bb56e3,9596257a-cb90-11eb-9615-121df0c29c1e,50021,"Casella Waste Systems, Inc.",00007d04-ccf9-11ea-ba95-121df0c29c1e,utility,12561091,2009-09-16,US,12,REC CREDIT DISTRIBUTION SYSTEM AND METHOD,A method for promoting recycling from a fund e...,False,ipa100318.xml,2009,fl:ja_ln:bohlig-1,2,5d4a19e2-cb8e-11eb-9615-121df0c29c1e,fl:ja_ln:bohlig-1,James W.,Bohlig,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3125326,20100070344,utility,12556109,2009,3341dd6e-43f6-4853-a436-b092a65c4a60,fe664e0e-cb8e-11eb-9615-121df0c29c1e,06037,Truecar.com,ffffe9d9-ccf8-11ea-ba95-121df0c29c1e,utility,12556109,2009-09-09,US,12,SYSTEM AND METHOD FOR CALCULATING AND DISPLAYI...,Embodiments of systems and methods for the agg...,False,ipa100318.xml,2009,fl:sc_ln:painter-1,2,4534c36e-cb8e-11eb-9615-121df0c29c1e,fl:sc_ln:painter-1,Scott,Painter,1.0,1
3125327,20100070344,utility,12556109,2009,3341dd6e-43f6-4853-a436-b092a65c4a60,fe664e0e-cb8e-11eb-9615-121df0c29c1e,06037,Truecar.com,ffffe9d9-ccf8-11ea-ba95-121df0c29c1e,utility,12556109,2009-09-09,US,12,SYSTEM AND METHOD FOR CALCULATING AND DISPLAYI...,Embodiments of systems and methods for the agg...,False,ipa100318.xml,2009,fl:to_ln:taira-2,1,fe664e0e-cb8e-11eb-9615-121df0c29c1e,fl:to_ln:taira-2,Tom,Taira,1.0,1
3125328,20100070350,utility,12259937,2008,2721c099-5a78-45fa-b3a7-11d119300596,ff4c2272-cb8e-11eb-9615-121df0c29c1e,06085,Google LLC,fffff7d2-ccf8-11ea-ba95-121df0c29c1e,utility,12259937,2008-10-28,US,12,DYNAMIC PRICING FOR CONTENT PRESENTATIONS,A request for content is received. First conte...,False,ipa100318.xml,2008,fl:am_ln:paunikar-1,1,fac6a96e-cb8f-11eb-9615-121df0c29c1e,fl:am_ln:paunikar-1,Amit,Paunikar,1.0,1
3125329,20100070350,utility,12259937,2008,2721c099-5a78-45fa-b3a7-11d119300596,ff4c2272-cb8e-11eb-9615-121df0c29c1e,06085,Google LLC,fffff7d2-ccf8-11ea-ba95-121df0c29c1e,utility,12259937,2008-10-28,US,12,DYNAMIC PRICING FOR CONTENT PRESENTATIONS,A request for content is received. First conte...,False,ipa100318.xml,2008,fl:mi_ln:hochberg-2,2,de8e30d6-cb8e-11eb-9615-121df0c29c1e,fl:mi_ln:hochberg-2,Michael,Hochberg,1.0,1


In [35]:
application_assignee_inventor_df.query("document_number == 20070154943")

Unnamed: 0,document_number,type_x,application_number_x,year_x,assignee_id,location_id_x,GEOID,organization,id_x,type,application_number_y,date,country,series_code,invention_title,invention_abstract,rule_47_flag,filename,year_y,inventor_id,sequence,location_id_y,id_y,name_first,name_last,male_flag,attribution_status
2866200,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,70fc9ca7-09bd-11ec-893a-12de62d610b1,9011,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:br_ln:fergen-1,3,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:br_ln:fergen-1,Brian James,FERGEN,1.0,1
2866201,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,70fc9ca7-09bd-11ec-893a-12de62d610b1,9011,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:ca_ln:tucker-1,4,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:ca_ln:tucker-1,Cassius M.,Tucker,1.0,1
2866202,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,70fc9ca7-09bd-11ec-893a-12de62d610b1,9011,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:ma_ln:ficken-1,2,5fccb06f-cb8e-11eb-9615-121df0c29c1e,fl:ma_ln:ficken-1,Martin D.,Ficken,1.0,1
2866203,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,70fc9ca7-09bd-11ec-893a-12de62d610b1,9011,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:mi_ln:ellsworth-1,1,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:mi_ln:ellsworth-1,Michael A.,Ellsworth,1.0,1
2866204,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,fe67d7f9-cb8f-11eb-9615-121df0c29c1e,36061,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:br_ln:fergen-1,3,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:br_ln:fergen-1,Brian James,FERGEN,1.0,1
2866205,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,fe67d7f9-cb8f-11eb-9615-121df0c29c1e,36061,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:ca_ln:tucker-1,4,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:ca_ln:tucker-1,Cassius M.,Tucker,1.0,1
2866206,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,fe67d7f9-cb8f-11eb-9615-121df0c29c1e,36061,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:ma_ln:ficken-1,2,5fccb06f-cb8e-11eb-9615-121df0c29c1e,fl:ma_ln:ficken-1,Martin D.,Ficken,1.0,1
2866207,20070154943,utility,11724624,2007,93cc62ac-5391-47aa-8399-2944f43a4429,fe67d7f9-cb8f-11eb-9615-121df0c29c1e,36061,Pfizer Inc.,eaac59d1-cc76-11ea-ba95-121df0c29c1e,utility,11724624,2007-03-15,US,11,Methods for preventing cattle reproductive dis...,The present invention relates to methods for t...,False,ipa070705.xml,2007,fl:mi_ln:ellsworth-1,1,dbfb5e55-cb8e-11eb-9615-121df0c29c1e,fl:mi_ln:ellsworth-1,Michael A.,Ellsworth,1.0,1


In [33]:
application_assignee_inventor_df.groupby(['document_number', 'organization', 'id_y' ]).size().sort_values(ascending = False).head(40)

document_number  organization                                 id_y                                
20190136670      SSS Group Ltd.                               fl:al_ln:gorbunov-1                     3
20070154943      Pfizer Inc.                                  fl:br_ln:fergen-1                       2
20080311151      Pfizer Inc.                                  fl:ke_ln:king-8                         2
20080069836      GenVec, Inc.                                 fl:ch_ln:cheng-592                      2
20050143816      AT&T Intellectual Property I, L.P.           fl:ar_ln:beisang-1                      2
20130031084      Akiban Technologies, Inc.                    fl:or_ln:herrnstadt-1                   2
20130019920      AMONIX, INC.                                 fl:ge_ln:kinsey-3                       2
20130034936      TEXAS INSTRUMENTS INCORPORATED               fl:sr_ln:koduri-4                       2
20130019917      HAMILTON SUNDSTRAND CORPORATION              fl:de_l

## Locations

- fix by adding geoID

In [None]:
_all_locations = pd.concat([assignee_location_ids, application_inventor_male_flag_location_ids], ignore_index=True, axis=0)
print(_all_locations.shape)
all_locations = pd.DataFrame(_all_locations.location_id.unique())
all_locations = all_locations.rename(columns={0: "location_id"})
print(all_locations.shape)
all_locations.to_csv(r"./data/pregrant/pregrant_locations.csv")

all_locations.query("location_id=='baa6fcdc-cb8e-11eb-9615-121df0c29c1e'")

In [None]:
_location_df.head()

In [None]:
# print(_location_df.query("country == 'US'").county_fips.isnull().sum())
print(_location_df.query("country == 'US' & city.isnull() & state.isnull() & county.isnull() ", engine="python").shape[0])



In [None]:
_location_df.county_fips