In [None]:
%run notebook_preamble.ipy

In [None]:
import re

# A quick look at patents and trademarks


Patent and trademark data are available from the the Intellectual Property Office.

* Patent data: https://www.gov.uk/government/publications/ipo-patent-data
* Trademark data: https://www.gov.uk/government/publications/ipo-trade-mark-data-release




### Patents

A guide for analysing patent data: https://www.praxisauril.org.uk/sites/praxisunico.org.uk/files/IPO_The_patent_guide.pdf


In [None]:
patents_raw = pd.read_excel('https://www.gov.uk/government/uploads/system/uploads/attachment_data/file/734638/PatentopendataJuly2018.xlsx')

In [None]:
import pandas as pd

In [None]:
patents_raw.to_csv(f"{data_path}/external/patents.csv")

In [None]:
patents_raw = patents_raw.copy()

In [None]:
patents = patents_raw.copy()

patents.columns = [re.sub(' ','_',x).lower() for x in patents.columns]

print(patents.shape)

patents.head().T

In [None]:
patents['applicant_country_code'].value_counts().head()

Note the collaborative patents

In [None]:
patents.applicant_country.value_counts().head()

Already geocoded

In [None]:
patents.applicant_name.value_counts().head()

In [None]:
patents.applicant_name.loc[patents.applicant_country=='Scotland'].value_counts().head()

The applicants seem to be companies

### Dates

In [None]:
patents['filling_year'] = [int(str(x).split('-')[0]) if not pd.isnull(x) else x for x in patents.filing_date]

In [None]:
patents['publication_year'] = [int(str(x).split('-')[0]) if not pd.isnull(x) else x for x in patents.a_publication_date]

In [None]:
patents.publication_year.value_counts().plot.bar(color='blue')

Why has the number of patents in the data declined since the 1980s?

In [None]:
patents.loc[(patents.publication_year==2018)&(patents.applicant_country=='Scotland')].head()

### Trademarks

In [None]:
trademarks = pd.read_csv(f'{data_path}/external/trademarks.txt',encoding='utf-16',delimiter='|',error_bad_lines=False)

In [None]:
trademarks.head()

In [None]:
trademarks.columns = [re.sub(' ','_',x).lower() for x in trademarks.columns]

In [None]:
trademarks.shape

In [None]:
trademarks.country.value_counts().head()

In [None]:
trademarks.region.value_counts()[:10]

In [None]:
trademarks.postcode.value_counts().head()

Easy to geocode via postcodes

In [None]:
trademarks['year_published'] = [int(str(x).split('-')[0]) if not pd.isnull(x) else x for x in trademarks.published]

In [None]:
trademarks_published_counts = trademarks.year_published.value_counts()

In [None]:
trademarks_published_counts[sorted(trademarks_published_counts.index)].plot()

In [None]:
trademarks.columns

### Encoding the trademarks

In [None]:
nice_codes = pd.read_csv(f'{data_path}/external/nice_codes.csv').dropna(axis=0,subset=['Code.1']).iloc[:-1,:]

nice_codes

In [None]:
nice_codes['class'] = ['class'+str(int(x[3:])) for x in nice_codes['Code.1']]

In [None]:
nice_df = nice_codes[['class','Description']]

nice_lookup = {x:y for x,y in zip(nice_df['class'],nice_df['Description'])}

In [None]:
trademarks['is_scotland'] = [x=='Scotland' for x in trademarks.region]

tr_classes = trademarks.loc[:,['class' in x for x in trademarks.columns]]

In [None]:
sc_classes = tr_classes.groupby(trademarks['is_scotland']).mean().T

In [None]:
sc_classes.index= [nice_lookup[x][:20] for x in sc_classes.index]

In [None]:
sc_classes['ratio']= sc_classes[True]/sc_classes[False]

In [None]:
fig,ax = plt.subplots(figsize=(10,5))


sc_classes['ratio'].sort_values(ascending=False).plot.bar(ax=ax,color='blue',title='Trademarks: Ratio of Scotland vs UK share')

In [None]:
trademarks.to_csv(f'../data/external/{today_str}_trademarks.csv')

In [None]:
with open('../metadata/nice_lookup.json','w') as outfile:
    json.dump(nice_lookup,outfile)