In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
display(HTML("<style>.output_result { max-width:98% !important; }</style>"))


# Code

## Imports

In [2]:
import pandas as pd
import numpy as np
import datetime
from collections import Counter
from tqdm.notebook import tqdm
import nltk
from nltk.corpus import stopwords
from nltk.util import ngrams, bigrams, trigrams
import statistics as stat
import seaborn as sns
import re
from nltk.stem.wordnet import WordNetLemmatizer
import plotly.colors as colors
import dataframe_image as dfi

# Set default color palette
colors_plotly_default = colors.qualitative.Plotly

main_path_mac = '/Users/philippmetzger/Documents/GitHub/battery_patents/'
main_path_ssd = '/Volumes/Samsung Portable SSD T3 Media/'

import sys
packages_path = main_path_mac+'/07 Packages'
sys.path.append(packages_path)

from helpers import (current_time_string,
                              image_saver,
                              country_labels_dict,
                              ctry_code_name_dict,
                              message,
                              numbers_dict)


## Read the whole dataset and reduce it to what we are interested in

In [3]:
# Read the whole dataset
dataset_name = 'data_batteries_2022-01-26_1852'

path = main_path_ssd+'Dataset saves/04 From 15 Nov 2021 (release of 2021 Autumn edition)/01 Preprocessed/03 final - technologies tagged/'+dataset_name+'.csv'

print('Loading data from:')
print(path)

data = pd.read_csv(path, delimiter = ";", low_memory = False, na_values=['', ' ', '  '], keep_default_na = False)

print('Number of rows:', len(data))

print('Distinct values in column "granted":', pd.unique(data['granted']))

# Reduce it to non active parts, electrodes, secondary cells, charging, redox flow, and Nickel-Hydrogen
a = (data['non_active_parts_electrodes_secondary_cells'] == 1)
b = (data['charging'] == 1)
c = (data['is_Redox flow'] == 1)
d = (data['is_Nickel–hydrogen'] == 1)

data_reduced = data[a | b | c | d].copy()
del data
data = data_reduced

# Futher reduce it to IPFs only
data_ipf = data[data['tag'] == 'IPF'].copy()
ipf_percentage = (len(set(data_ipf['docdb_family_id'])) / len(set(data['docdb_family_id']))) * 100
print('Percentage of IPFs in relation to all battery patent families:'+str(round(ipf_percentage, 2))+'%')
del data
data = data_ipf


Loading data from:
/Volumes/Samsung Portable SSD T3 Media/Dataset saves/04 From 15 Nov 2021 (release of 2021 Autumn edition)/01 Preprocessed/03 final - technologies tagged/data_batteries_2022-01-26_1852.csv
Number of rows: 4086532
Distinct values in column "granted": ['N' 'Y']
Percentage of IPFs in relation to all battery patent families:19.41%


## Sort it by ['dobdb_family_id', 'earliest_publn_date']

In [4]:
data = data.sort_values(by = ['docdb_family_id', 'earliest_publn_date'])


## Reduce to years we are interested in

In [5]:
print(set(data['earliest_publn_year_this_family_id']))


{1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019}


In [6]:
data_reduced = data[data['earliest_publn_year_this_family_id'] >= 2000].copy()
del data
data = data_reduced


In [7]:
print(set(data['earliest_publn_year_this_family_id']))


{2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019}


In [8]:
only_granted = False


## In appln_abstract and appln_title: Replace NaNs with '  '

In [9]:
data['appln_abstract'].fillna('  ', inplace=True)
data['appln_title'].fillna('  ', inplace=True)


## Infer our time frame from data

In [10]:
year_begin = min(data['earliest_publn_year_this_family_id'])
year_end = max(data['earliest_publn_year_this_family_id'])

years = list(range(year_begin, year_end + 1))
print(years)


[2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]


## Of every family, keep only the last english, non-nan title and abstract

In [11]:
family_ids = pd.unique(data['docdb_family_id'])

# Create two dictionaries containing an empty set for each year
titles_dict = {}
abstracts_dict = {} 
for year in years:
    
    titles_dict[year] = set()
    abstracts_dict[year] = set()
    
for family_id in tqdm(family_ids):
    
    data_this_family_id = data[data['docdb_family_id']==family_id]
    
    earliest_publn_year_this_family_id = list(set(data_this_family_id['earliest_publn_year_this_family_id']))[0]
    
    
    # Get titles
    
    titles_this_family_id = list(pd.unique(data_this_family_id[
        data_this_family_id['appln_title_lg']=='en'
    ]['appln_title']))
    try:
        titles_this_family_id.remove('  ')
    except:
        pass
    
    try:
        last_title_this_family_id = titles_this_family_id[-1]
    except:
        pass
    
    titles_dict[earliest_publn_year_this_family_id].add(last_title_this_family_id)
    
    
    # Get abstracts
    
    abstracts_this_family_id = list(pd.unique(data_this_family_id[
        data_this_family_id['appln_abstract_lg']=='en'
    ]['appln_abstract']))
    try:
        abstracts_this_family_id.remove('  ')
    except:
        pass
    
    try:
        last_abstract_this_family_id = abstracts_this_family_id[-1]
    except:
        pass
    
    abstracts_dict[earliest_publn_year_this_family_id].add(last_abstract_this_family_id)
    

  0%|          | 0/92700 [00:00<?, ?it/s]

## Get titles and abstracts counts for each year

In [12]:
titles_counts = []
for year in list(titles_dict):
    titles_counts.append(len(titles_dict[year]))

print(titles_counts)


[939, 1135, 1105, 1151, 1243, 1501, 1818, 2002, 2298, 2627, 3126, 4622, 5970, 6614, 7040, 6929, 6968, 7562, 8513, 9523]


In [13]:
abstracts_counts = []
for year in list(abstracts_dict):
    abstracts_counts.append(len(abstracts_dict[year]))

print(abstracts_counts)


[975, 1164, 1132, 1191, 1288, 1566, 1955, 2147, 2480, 2808, 3376, 5152, 6757, 7463, 7936, 7733, 7830, 8438, 9677, 11016]


## Write counts in a dataframe and normalise them

In [14]:
# Read total yearly counts and add column 'normalised' 

#total_yearly_counts_df = pd.read_csv('/Users/philippmetzger/Documents/GitHub/MA_temp/03 Analysis/01 Country counts/total_yearly_counts', delimiter=';')
#total_yearly_counts_df.rename(columns = {'count': 'patent families count'}, inplace = True)

#max_patent_count = total_yearly_counts_df['patent families count'].max()
#total_yearly_counts_df['patent families count normalised'] = total_yearly_counts_df['patent families count'] / max_patent_count

total_yearly_counts_df = pd.DataFrame()

total_yearly_counts_df['titles counts'] = titles_counts
#max_titles_count = total_yearly_counts_df['titles counts'].max()
#total_yearly_counts_df['titles count normalised'] = total_yearly_counts_df['titles counts'] / max_titles_count


total_yearly_counts_df['abstracts counts'] = abstracts_counts
#max_abstracts_count = total_yearly_counts_df['abstracts counts'].max()
#total_yearly_counts_df['abstracts count normalised'] = total_yearly_counts_df['abstracts counts'] / max_abstracts_count


total_yearly_counts_df


Unnamed: 0,titles counts,abstracts counts
0,939,975
1,1135,1164
2,1105,1132
3,1151,1191
4,1243,1288
5,1501,1566
6,1818,1955
7,2002,2147
8,2298,2480
9,2627,2808


## Define stopwords, contexts, equivalents, words to replace, and punctuation

In [15]:
stopwords_ = stopwords.words('english')
stopwords_.extend([
    'thereof', 'therefor', 'thereafter', 'wherein', 'utmost',
    'without', 'within',
    'xo', 'e', 'etc', 'ab', 'b', 'c', 'pct', 'wo', 'pt', 'pts', 'wt', 'xii', 'xiii', 'ymyo', 'xmn', 'xiv', 'le', 'sub',
    'r', 'x', 'g', 'p', 'v', 'zfz', 'zsz',
    'positive', 'negative', 'left', 'right',
    'high', 'low',
    'less', 'les', 'more', 'least',
    'judging', 'preparing', 'producing', 'comprising', 'following', 'containing', 'including', 'using', 'consisting',
    'making',
    'one', 'two', 'never',
    'end',
    'almost', 'like', 'also',
    'especially', 'preferably', 'surely', 'nearly', 'previously', 'mainly',
    'involves', 'comprises', 'provides', 'relates', 'belongs', 'discloses', 'includes',
    'solved', 'expressed', 'specified', 'provided', 'selected', 'characterized', 'included', 'equipped',
    'decided', 'made', 'filed', 'used', 'formed', 'said',
    'provide', 'improve', 'prevent', 'obtain', 'reduce', 'enhance', 'increase', 'suppress', 'realize', 'use',
    'first', 'second',
    'simple', 'convenient',
    'whose',
    'according',
    'capable', 'preferable', 'desirable',
    'desirably',
    'kind',
    'date', 'temp', 'sec', 
    'jan', 'apr', 'may', 'jun', 'jul', 'nov', 'oct',
    'jp',
    'problem', 'drawing', 'figure', 'invention', 'model', 'publication', 'utility', 'preparation',
    'method', 'application', 'purpose', 'number',
    'new', 'novel',
    'excellent',
    'non',
    'top', 'bottom'
])


# Contexts in which first word should be kept
contexts_after = [
    ['positive', 'electrode'],
    ['negative', 'electrode'],
    ['positive', 'electrodes'],
    ['negative', 'electrodes'],
    ['positive', 'active', 'material'],
    ['negative', 'active', 'material'],
    ['non', 'aqueous'],
    ['non', 'sintered'],
    ['top', 'cap'],
    ['bottom', 'plate']
]


# Contexts in which second word should be kept
contexts_before = [
    ['lithium', 'containing']
]
    

treat_as_same = [
    [('method', 'manufacturing'), ('manufacturing', 'method')],
    [('storage', 'battery', 'alkaline'), ('alkaline', 'storage', 'battery')],
    [('battery', 'alkaline', 'storage'), ('alkaline', 'storage', 'battery')]
]


replace_words = {
    'soln': 'solution',
    'aq': 'aqueous',
    'nonaqueous': 'non aqueous',
    'obtd': 'obtained',
    'hr': 'hour',
    'pub': 'publication',
    'compsn': 'composition',
    'contg': 'containing',
    'compd': 'compound',
    'mfg': 'manufacturing',
    'methodfor': 'method for',
    'al': 'aluminium',
    'aluminum': 'aluminium',
    'co': 'cobalt',
    'mn': 'manganese',
    'ni': 'nickel',
    'zr': 'zirconium',
    'cr': 'chromium',
    'ti': 'titanium',
    'li': 'lithium',
    'la': 'lanthanum',
    'ce': 'cerium',
    'fe': 'iron',
    'ltoreq':'less than or equal',
    'deg': 'degree'
}


# This is not used in this application after all:
punctuation = '!"#$%&\()*+,-./:;<=>?@[\\]^_`{|}~'


In [16]:
# Check if replace_words dictionary works as it should

item = ['negative', 'obtd', 'soln', 'nonaqueous', 'active', 'material']

item_replaced = []
for word in item:
    
    if word in list(replace_words):
        
        item_replaced.extend(replace_words[word].split())
        
    else:
        
        item_replaced.append(word)
    
item = item_replaced
    
item


['negative', 'obtained', 'solution', 'non', 'aqueous', 'active', 'material']

## Define a function for taking care of key phrases extraction and counting

In [17]:
def growing_keywords(n_gram_length, item_type):
    
    # Initialise lemmatizer
    lem = WordNetLemmatizer()
    
    # create string for identifying the right language column
    #item_type_lg = item_type+'_lg'
    
    # Initialise n grams list
    ngrams_lists = []

    # Loop over years
    for year in tqdm(years):

        # Initialise n grams list for this year
        ngrams_list_this_year = []

        # Get all  
        #items_year = list(set(data[(data[item_type_lg]=='en') & (data['earliest_publn_year_this_family_id']==year)][item_type]))
        
        # Get this year's titles / abstracts (depending which mode we're in)
        if item_type == 'appln_title':
            items_year = list(titles_dict[year])
        if item_type == 'appln_abstract':
            items_year = list(abstracts_dict[year])
                
        for item in items_year:
            
            # Make all lowercase
            item = item.lower()

            # Punctuation removal
            item = re.sub('[^a-zA-Z]', ' ', item) 
            #for x in punctuation:
            #    item = item.replace(x,' ')

            # Tokenise
            item = item.split()
            
            #####
            
            # Replace certain words with others (according to replace_words dictionary defined above)
            item_replaced = []
            
            for word in item:

                if word in list(replace_words):
        
                    item_replaced.extend(replace_words[word].split())

                else:

                    item_replaced.append(word)

            item = item_replaced
            
            #####
            
            # Remove stopwords; but only if they are not in a context that indicates that they should be kept.
            # Such contexts are defined in cell above (contexts_after and contexts_before).
            
            item_without_stopwords = []

            # For debugging: List of stopwords that will be checked
            check_list = []
            # For debugging: List of stopwords that are actually removed (because they are not in a certain context)
            remove_list = []
                        
            for i, word in enumerate(item):
                
                remove = False

                if word in stopwords_:
 
                    check_list.append(word)

                    # Treat cases with context after the word or phrase
        
                    in_context_after = False

                    for j, context in enumerate(contexts_after):
                        
                        if i<(len(item)-(len(context)-1)):

                            try:

                                to_compare = []
                                
                                for k in range(len(context)):
                                    to_compare.append(item[i+k])
                                    
                                if to_compare == context:
                                    in_context_after = True
                                    
                                    if False:
                                        print('not removed due to context')
                                        print(to_compare)
                                        print(context)
                                        print()

                            except:
                                print(contexts_after)
                                print(item)
                                print(len(item))
                                print(i+k)
                                print(to_compare)
                                print(context)

                    if not in_context_after:

                        remove = True
                        
                    # Treat cases with context before the word or phrase
        
                    in_context_before = False

                    for j, context in enumerate(contexts_before):
                        
                        if i>(len(context)-1):
                                
                            try:

                                to_compare = []

                                for k in range(len(context)):
                                    
                                    to_compare.append(item[i+k-(len(context)-1)])

                                if to_compare == context:
                                    in_context_before = True
                                    
                                    if False:
                                        print('not removed due to context')
                                        print(to_compare)
                                        print(context)
                                        print()

                            except:
                                print(contexts_before)
                                print(item)
                                print(len(item))
                                print(i+k)
                                print(to_compare)
                                print(context)

                    if not in_context_before:

                        remove = True
                        
                if not remove:
                    
                    item_without_stopwords.append(word)
                    
                else:
                                            
                    remove_list.append(word)

            #####
            
            # Delete words that are a repetition of the word before
            
            # Always add the first word to next stage
            # If this list of words is empty, do nothing
            try:
                item_without_stopwords_and_repetitions = [item_without_stopwords[0]]
            except Exception as e:
                pass
                                    
            # Loop over the other words (the second and following) and add them to the next stage, if they are not 
            # a repetition of the word before
            for i in range(1, len(item_without_stopwords)):
                
                if (item_without_stopwords[i-1] != item_without_stopwords[i]):
                    
                    item_without_stopwords_and_repetitions.append(item_without_stopwords[i]) 
            
            #####
            
            # Lemmatisation
            item_without_stopwords_lemmatized = [lem.lemmatize(word) for word in item_without_stopwords_and_repetitions] 

            #####
            
            # Get ngrams
            ngrams_ = list(ngrams(item_without_stopwords_lemmatized, n_gram_length))
            
            # Treat certain pairs of n_grams as the same (defined in cell above (treat_as_same))
            for n_gram_treat_as_same in treat_as_same:
                
                while n_gram_treat_as_same[0] in ngrams_:
                    
                    ngrams_.remove(n_gram_treat_as_same[0])
                    ngrams_.append(n_gram_treat_as_same[1])

            # Add to list
            ngrams_list_this_year.extend(ngrams_)
            
        # For 3-grams: delete it if first word is equal to third word, e.g. battery pack battery
        if n_gram_length == 3:
            
            ngrams_list_this_year_reduced = []
            
            for item in ngrams_list_this_year:
                
                if not item[0] == item[2]:
                    
                    ngrams_list_this_year_reduced.append(item)
                    
            ngrams_list_this_year = ngrams_list_this_year_reduced

        ngrams_lists.append(ngrams_list_this_year)
        
    print('N-grams created')

    #####
    
    # Count n grams' appearances
    counter_list = []
    unique_keys = set()

    for list_ in ngrams_lists:

        counter = dict(Counter(list_).most_common())
        counter_list.append(counter)

        unique_keys = unique_keys.union(set(counter.keys()))
        
    print('N-grams counted')

    #####
    
    # NEW 17 Jan 2022: Delete all phrases that have at least one year where the counter is zero.
    # V2, same day: Delete all phrases that have more than 15 years where the counter is zero.
    if False:
        
        unique_keys_reduced = set()

        for key_ in unique_keys:

            has_zero = 0

            for counter in counter_list:

                if key_ not in counter:

                    has_zero += 1

            if has_zero > 15:

                for counter in counter_list:

                    try:
                        counter.pop(key_)
                    except Exception as pop_error:
                        pass
                        #print(type(pop_error))
                        #print(pop_error)
                        #print()

            else:

                unique_keys_reduced.add(key_)

        unique_keys = unique_keys_reduced
                
    #####
    
    # Create a count entry of 0 for n grams that is present in at least one year but not in other(s)
    for counter in counter_list:

        for key_ in unique_keys:

            if key_ not in counter:

                counter[key_] = 0

    #####      
    
    relative = False
    
    # NEW 10 Nov 2021: Increment all counts by 1 (in order to avoid division by 0 in growth calculation)
    # This is only necessary when using relative growth (see growth calcuation further down)
    if relative:
        
        for counter in counter_list:

            for key_ in counter:

                    counter[key_] = counter[key_] + 1
    
    def growing_keywords_sub(counter_list, unique_keys, scale):
    
        #####

        # NEW 10 Nov 2021: Scale by year's distinct title / abstract count
        # NEW 18 Jan 2022: Scale by year's distinct title / abstract count to make it "per 1000 titles / abstracts"

        if scale:

            if item_type == 'appln_title':
                #normalised_patent_counts = list(total_yearly_counts_df['titles count normalised'])
                patent_counts = list(total_yearly_counts_df['titles counts'])

            elif item_type == 'appln_abstract':
                #normalised_patent_counts = list(total_yearly_counts_df['abstracts count normalised'])
                patent_counts = list(total_yearly_counts_df['abstracts counts'])

            else:
                print('Item type not recognised')
                return

            for i, counter in enumerate(counter_list):

                #normalised_patent_count_this_year = normalised_patent_counts[i]
                patent_count_this_year = patent_counts[i]

                for key_ in counter:

                    #value_scaled = counter[key_] / normalised_patent_count_this_year
                    value_scaled = counter[key_] / patent_count_this_year * 1000
                    counter[key_] = value_scaled


        #####

        # Calculate increase over whole time span
        growth_dict = {}
        growth_dict_absolute = {}

        for key_ in unique_keys:

            growth_dict[key_] = counter_list[len(counter_list) - 1][key_] - counter_list[0][key_]

        print('Difference over whole timespan calculated')

        # Calculate sum of absolute differences year-over-year; absolute => Growing and shrinking are treated as the same
        for key_ in unique_keys:

            growth = []
            for i in range(len(counter_list)-1):

                if not relative:

                    # Growth as abs(x1 - x0)
                    growth.append(abs(counter_list[i+1][key_]-counter_list[i][key_]))

                else:

                    # Growth as abs(x1 / x0) - 1. Only works if x0 is not zero.
                    try:
                        growth.append(abs((counter_list[i+1][key_] / counter_list[i][key_]) - 1))
                    except:
                        print('error')

            try:
                growth_dict_absolute[key_] = sum(growth)
            except Exception as e2:
                print(type(e2))
                print(e2)

        print('Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated')

        #####
        #####

        # Prepare positive growth plot
        highest_growth = dict(sorted(growth_dict.items(), key=lambda x:x[1], reverse=True))
        top_30_growth = list(highest_growth)[:30]
        top_50_growth = list(highest_growth)[:50]

        growing_list = []
        growing_list.append(top_50_growth)

        #####

        counts_list = []

        #for key_ in top_30_growth:
        for key_ in top_50_growth:

            counts = []

            for counter in counter_list:

                counts.append(counter[key_])

            counts_list.append(counts)

        #####

        df_keyword_growth = pd.DataFrame(index=years)
        for i, ngram_ in enumerate(top_50_growth):

            ngram_string = ' '.join(ngram_)

            df_keyword_growth[ngram_string] = counts_list[i]

        df_keyword_growth = df_keyword_growth.transpose()

        # Round all values
        df_keyword_growth = df_keyword_growth.round()
        df_keyword_growth = df_keyword_growth.apply(pd.to_numeric, downcast='integer')
        
        growing_list.append(df_keyword_growth)

        #####

        cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)

        plot_positive_growth = df_keyword_growth.style.background_gradient(cmap=cm, axis=1)
        
        # Display thousands with comma separation
        plot_positive_growth.format("{:,d}")

        growing_list.append(plot_positive_growth)

        print('Positive change plot created')

        #####
        #####

        # Prepare negative growth plot

        highest_growth = dict(sorted(growth_dict.items(), key=lambda x:x[1], reverse=False))
        top_30_growth = list(highest_growth)[:30]
        top_50_growth = list(highest_growth)[:50]

        shrinking_list = []
        shrinking_list.append(top_50_growth)

        #####

        counts_list = []

        for key_ in top_50_growth:

            counts = []

            for counter in counter_list:

                counts.append(counter[key_])

            counts_list.append(counts)

        #####

        df_keyword_growth = pd.DataFrame(index=years)
        for i, ngram_ in enumerate(top_50_growth):

            ngram_string = ' '.join(ngram_)

            df_keyword_growth[ngram_string] = counts_list[i]

        df_keyword_growth = df_keyword_growth.transpose()

        # Round all values
        df_keyword_growth = df_keyword_growth.round()
        df_keyword_growth = df_keyword_growth.apply(pd.to_numeric, downcast='integer')

        shrinking_list.append(df_keyword_growth)

        #####

        cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)

        plot_negative_growth = df_keyword_growth.style.background_gradient(cmap=cm, axis=1)
        
        # Display thousands with comma separation
        plot_negative_growth.format("{:,d}")

        shrinking_list.append(plot_negative_growth)

        print('Negative change plot created')

        #####
        #####

        # Prepare absolute growth plot
        highest_growth = dict(sorted(growth_dict_absolute.items(), key=lambda x:x[1], reverse=True))
        top_30_growth = list(highest_growth)[:30]
        top_50_growth = list(highest_growth)[:50]

        absolute_growth_list = []
        absolute_growth_list.append(top_50_growth)

        #####

        counts_list = []

        #for key_ in top_30_growth:
        for key_ in top_50_growth:

            counts = []

            for counter in counter_list:

                counts.append(counter[key_])

            counts_list.append(counts)

        #####

        df_keyword_growth = pd.DataFrame(index=years)
        for i, ngram_ in enumerate(top_50_growth):

            ngram_string = ' '.join(ngram_)

            df_keyword_growth[ngram_string] = counts_list[i]

        df_keyword_growth = df_keyword_growth.transpose()

        # Round all values
        df_keyword_growth = df_keyword_growth.round()
        df_keyword_growth = df_keyword_growth.apply(pd.to_numeric, downcast='integer')

        absolute_growth_list.append(df_keyword_growth)        
        
        #####

        cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
        
        plot_absolute_growth = df_keyword_growth.style.background_gradient(cmap=cm, axis=1)
        
        # Display thousands with comma separation
        plot_absolute_growth.format("{:,d}")

        absolute_growth_list.append(plot_absolute_growth)

        print('Absolute change plot created')

        return growing_list, shrinking_list, absolute_growth_list
    
    growing_list, shrinking_list, absolute_growth_list = growing_keywords_sub(counter_list, unique_keys, False)
    growing_list_scaled, shrinking_list_scaled, absolute_growth_list_scaled = growing_keywords_sub(counter_list, unique_keys, True)
    
    return growing_list, shrinking_list, absolute_growth_list, growing_list_scaled, shrinking_list_scaled, absolute_growth_list_scaled


## Define a function for generating LaTeX code

In [324]:
def generate_latex_code(df):
    """This function takes a dataframe with n-gram counts as input and generates LaTeX code for creating a table with row-wise color gradients"""
    
    max_ = df.max(axis = 1)
    min_ = df.min(axis = 1)
    
    intensity = (df.subtract(min_, axis = 0)).divide((max_ - min_), axis = 0) * 100
    
    table_width = df.shape[1]
    table_height = df.shape[0]

    latex_code = '\\begin{tabularx}{\linewidth} {| >{\\raggedright\\arraybackslash}p{3.7cm}'

    for i in range(table_width):
        latex_code = latex_code+'| >{\\raggedleft\\arraybackslash}X '

    latex_code = latex_code+'| }\n'

    first_row = '\mc{} & '
    for year in years[:-1]:
        first_row = first_row + '\mc{'+str(year)+'} & '
    first_row = first_row + '\mc{'+str(years[-1])+'}'
    first_row = first_row+' \\\\'

    latex_code = latex_code+first_row+'\n\\hline\n\\hline'

    for i in range(table_height):
    #for i in range(3): # For testing purposes: Create only 3 rows

        intensity_this_phrase = list(intensity.loc[df.index[i]])

        this_row_code = df.index[i]+' & '
        for j in range(table_width):

            # Make text color white when cell color is darker
            if (intensity_this_phrase[j] >= 40):
                textcolor = 'white'
            else:
                textcolor = 'black'

            if j < (table_width - 1):
                this_row_code = this_row_code+'\\cellcolor{blue!'+str(intensity_this_phrase[j])+'!white}\\textcolor{'+textcolor+'}{'+str(df.iloc[i,j])+'} & '

            else:                        
                this_row_code = this_row_code+'\\cellcolor{blue!'+str(intensity_this_phrase[j])+'!white}\\textcolor{'+textcolor+'}{'+str(df.iloc[i,j])+'} \\\\'

        this_row_code = this_row_code+'\n\\hline'

        latex_code = latex_code+'\n'+this_row_code

    latex_code = latex_code+'\n\\end{tabularx}'

    print(latex_code)
    

## Two more definitions

In [18]:
# Define whether to use median or mean
#measure_function = stat.mean
#measure_function = stat.mean


# Results

## Titles

### Titles - unigrams

In [19]:
growing_list_title_1, shrinking_list_title_1, highest_abs_change_list_title_1, growing_list_title_1_scaled, shrinking_list_title_1_scaled, highest_abs_change_list_title_1_scaled = growing_keywords(
    1,
    'appln_title'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [305]:
growing_list_title_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,571,663,587,657,705,805,1033,1107,1312,1444,1725,2730,3428,3791,4108,3961,4082,4197,4939,5966
device,144,156,166,225,254,302,397,469,564,625,765,1067,1543,1889,1954,1800,1812,2027,2159,2559
system,88,127,128,150,137,186,236,320,423,513,630,1041,1334,1530,1528,1530,1508,1737,1925,2103
secondary,125,160,165,194,218,243,340,313,390,407,538,844,1104,1282,1375,1339,1379,1249,1477,1923
electrode,126,151,135,146,186,192,231,259,313,328,429,646,929,969,1092,1083,1011,1138,1283,1617
power,115,146,169,163,203,258,314,372,467,451,606,973,1342,1523,1773,1609,1631,1850,1702,1602
lithium,124,167,155,135,191,204,271,243,272,334,459,722,977,1054,1189,1237,1144,1026,1224,1601
vehicle,55,73,91,83,87,84,106,176,208,232,392,579,832,921,754,704,671,872,1142,1523
charging,76,108,103,100,114,144,132,173,225,293,379,497,725,894,823,822,921,1067,1208,1417
material,84,89,107,109,106,128,148,134,185,212,331,450,635,650,763,787,687,796,785,972


In [21]:
growing_list_title_1_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
system,94,112,116,130,110,124,130,160,184,195,202,225,223,231,217,221,216,230,226,221
device,153,137,150,195,204,201,218,234,245,238,245,231,258,286,278,260,260,268,254,269
vehicle,59,64,82,72,70,56,58,88,91,88,125,125,139,139,107,102,96,115,134,160
secondary,133,141,149,169,175,162,187,156,170,155,172,183,185,194,195,193,198,166,173,202
charging,81,95,93,87,92,96,73,86,98,112,121,108,121,135,117,119,132,141,142,149
module,9,18,12,14,13,19,31,19,37,28,29,39,47,42,50,47,48,53,60,63
energy,19,27,30,40,42,42,30,55,59,59,66,75,71,87,71,72,67,73,76,69
wireless,7,4,5,5,6,13,7,7,13,15,33,26,40,51,68,67,76,79,60,54
power,122,129,153,142,163,172,173,186,203,172,194,211,225,230,252,232,234,245,200,168
ion,26,27,24,29,27,38,50,37,42,38,61,71,81,83,87,95,88,71,71,71


In [22]:
shrinking_list_title_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
telephone,14,8,14,7,12,13,9,1,2,3,6,2,6,4,7,5,1,0,0,2
absorbing,11,8,1,0,1,2,1,10,2,2,2,1,4,3,4,4,7,4,2,4
alkaline,30,40,36,24,17,21,29,19,19,21,16,11,31,11,23,17,11,15,24,24
digital,8,1,4,3,4,1,5,2,3,0,5,10,4,5,3,9,9,8,5,3
sealed,17,16,8,6,9,7,16,13,12,13,29,17,23,29,33,22,23,11,18,12
manganate,5,1,0,0,0,2,1,0,5,2,3,0,0,0,2,1,0,3,0,0
hydrogen,25,34,14,18,12,13,21,24,16,21,8,14,12,12,17,26,17,15,25,20
cadmium,4,0,2,2,0,0,2,3,1,0,1,0,1,0,0,1,0,0,0,0
ionically,4,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,2,0
polarizable,4,1,2,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [23]:
shrinking_list_title_1_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
cell,109,126,103,105,91,58,72,63,61,67,67,65,77,91,89,79,78,58,68,61
process,48,34,30,27,36,27,22,20,19,20,26,18,17,13,12,10,10,11,11,11
alloy,33,22,16,14,10,13,7,14,4,8,5,5,6,4,4,4,2,2,1,3
alkaline,32,35,33,21,14,14,16,9,8,8,5,2,5,2,3,2,2,2,3,3
circuit,62,69,72,57,64,67,67,67,56,69,46,50,36,38,39,39,34,32,40,33
portable,36,24,30,30,32,49,33,31,31,27,23,23,15,17,13,15,13,13,6,9
rechargeable,45,41,47,30,29,35,39,46,37,29,27,28,32,24,29,27,31,20,18,19
hydrogen,27,30,13,16,10,9,12,12,7,8,3,3,2,2,2,4,2,2,3,2
charger,43,35,35,54,45,37,34,38,36,37,28,27,21,26,20,23,23,17,18,21
polymer,32,22,25,22,21,19,13,10,8,7,6,8,6,7,7,9,10,9,11,11


In [24]:
highest_abs_change_list_title_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,571,663,587,657,705,805,1033,1107,1312,1444,1725,2731,3428,3791,4108,3961,4082,4200,4939,5966
device,144,156,166,225,254,302,397,469,564,625,765,1067,1543,1889,1954,1800,1812,2027,2159,2559
power,115,146,169,163,203,258,314,372,467,451,606,973,1342,1523,1773,1609,1631,1850,1702,1602
secondary,125,160,165,194,218,243,340,313,390,407,538,844,1104,1282,1375,1339,1379,1252,1477,1923
system,88,127,128,150,137,186,236,320,423,513,630,1042,1334,1530,1528,1530,1508,1737,1925,2103
lithium,124,167,155,135,191,204,271,243,272,334,459,722,977,1054,1189,1237,1144,1029,1224,1601
vehicle,55,73,91,83,87,84,106,176,208,232,392,579,832,921,754,704,671,872,1142,1523
electrode,126,151,135,146,186,192,231,259,313,328,429,644,929,969,1092,1083,1011,1140,1283,1617
charging,76,108,103,100,114,144,132,173,225,293,379,498,725,894,823,822,921,1067,1208,1417
electric,42,67,51,62,57,72,121,123,144,178,255,425,578,678,486,494,441,494,600,726


In [25]:
highest_abs_change_list_title_1_scaled[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,608,584,531,571,567,536,568,553,571,550,552,591,574,573,584,572,586,555,580,626
power,122,129,153,142,163,172,173,186,203,172,194,211,225,230,252,232,234,245,200,168
lithium,132,147,140,117,154,136,149,121,118,127,147,156,164,159,169,179,164,136,144,168
device,153,137,150,195,204,201,218,234,245,238,245,231,258,286,278,260,260,268,254,269
secondary,133,141,149,169,175,162,187,156,170,155,172,183,185,194,195,193,198,166,173,202
vehicle,59,64,82,72,70,56,58,88,91,88,125,125,139,139,107,102,96,115,134,160
system,94,112,116,130,110,124,130,160,184,195,202,225,223,231,217,221,216,230,226,221
supply,61,50,68,46,72,85,70,84,80,57,63,71,65,57,62,54,58,56,54,44
cell,109,126,103,105,91,58,72,63,61,67,67,65,77,91,89,79,78,58,68,61
charging,81,95,93,87,92,96,73,86,98,112,121,108,121,135,117,119,132,141,142,149


### Titles - bigrams

In [26]:
growing_list_title_2, shrinking_list_title_2, highest_abs_change_list_title_2, growing_list_title_2_scaled, shrinking_list_title_2_scaled, highest_abs_change_list_title_2_scaled = growing_keywords(
    2,
    'appln_title'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [27]:
growing_list_title_2[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,96,116,118,141,183,223,312,296,363,387,502,797,1036,1173,1256,1207,1255,1173,1393,1835
lithium ion,22,31,25,33,29,55,87,70,90,98,183,319,468,506,566,595,546,478,528,583
battery pack,27,43,23,37,54,49,89,110,106,124,154,235,302,326,297,308,288,373,443,524
lithium secondary,42,63,58,48,77,85,101,68,77,101,147,184,236,277,250,218,241,190,290,487
active material,33,38,49,47,41,44,59,63,98,102,156,213,329,308,380,362,317,367,347,462
battery module,5,12,6,1,6,6,39,17,36,33,52,92,128,143,188,182,183,202,280,374
electric vehicle,11,10,7,12,6,6,16,18,27,36,111,153,245,290,173,198,138,221,251,343
energy storage,0,7,13,15,16,19,21,37,48,50,63,115,159,258,196,225,199,261,299,332
power supply,46,46,69,47,77,114,108,144,170,126,176,262,343,321,384,335,357,385,401,372
aqueous electrolyte,27,39,42,39,55,58,76,77,142,107,125,205,276,284,278,337,382,312,271,350


In [28]:
growing_list_title_2_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,102,102,107,123,147,149,172,148,158,147,161,172,174,177,178,174,180,155,164,193
lithium ion,23,27,23,29,23,37,48,35,39,37,59,69,78,77,80,86,78,63,62,61
energy storage,0,6,12,13,13,13,12,18,21,19,20,25,27,39,28,32,29,35,35,35
battery module,5,11,5,1,5,4,21,8,16,13,17,20,21,22,27,26,26,27,33,39
storage device,7,5,7,8,10,11,10,14,23,18,22,26,34,44,43,39,35,37,31,35
battery pack,29,38,21,32,43,33,49,55,46,47,49,51,51,49,42,44,41,49,52,55
electric vehicle,12,9,6,10,5,4,9,9,12,14,36,33,41,44,25,29,20,29,29,36
ion battery,10,9,11,8,3,12,17,13,17,16,24,32,31,29,34,37,39,35,39,33
electrode active,9,15,11,19,19,16,14,19,17,21,25,29,30,23,23,29,28,31,25,31
solid state,0,3,0,2,0,1,1,2,2,1,2,4,6,8,8,6,13,17,14,22


In [29]:
shrinking_list_title_2[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
absorbing alloy,10,6,1,0,0,1,1,8,1,2,1,0,3,0,0,3,2,0,0,0
hydrogen absorbing,10,7,1,0,0,1,1,8,1,2,1,0,3,0,0,3,2,0,0,1
alloy electrode,12,7,4,1,1,0,3,6,0,4,2,3,3,2,2,1,2,0,0,3
portable telephone,7,4,7,4,2,4,2,0,0,0,2,0,0,0,1,0,0,0,0,0
process production,10,7,2,5,3,5,2,5,2,4,13,25,10,2,4,3,1,1,3,3
electronic machine,5,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
nickel hydroxide,6,3,3,3,3,1,3,0,1,1,0,1,2,1,4,0,1,2,1,1
lithium manganate,5,1,0,0,0,2,1,0,3,2,3,0,0,0,1,1,0,3,0,0
layer capacitor,7,11,8,1,7,7,11,15,2,5,2,7,4,3,4,5,3,1,3,2
alkaline storage,11,24,18,14,10,10,14,10,4,9,8,8,17,2,14,8,1,0,4,7


In [30]:
shrinking_list_title_2_scaled[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery charger,27,19,17,23,14,13,15,13,15,13,11,7,5,5,6,4,4,2,4,3
electrochemical cell,28,26,13,13,19,11,12,12,15,6,7,6,10,11,10,8,6,7,7,6
storage battery,28,38,27,36,30,25,27,23,10,13,13,12,12,17,19,17,17,12,11,10
secondary cell,21,32,31,41,26,11,12,5,8,5,8,7,10,14,15,15,14,9,8,6
rechargeable lithium,20,17,15,5,7,11,4,9,8,6,7,10,11,6,12,8,9,5,4,5
lead acid,19,14,7,22,15,13,11,3,9,8,8,5,5,4,4,5,6,7,5,5
alloy electrode,13,6,4,1,1,0,2,3,0,2,1,1,1,0,0,0,0,0,0,0
charging battery,16,9,12,10,7,9,4,7,11,9,5,7,6,5,5,5,5,5,5,4
alkaline storage,12,21,16,12,8,7,8,5,2,3,3,2,3,0,2,1,0,0,0,1
absorbing alloy,11,5,1,0,0,1,1,4,0,1,0,0,1,0,0,0,0,0,0,0


In [31]:
highest_abs_change_list_title_2[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,96,116,118,141,183,223,312,296,363,387,502,797,1036,1173,1256,1207,1255,1173,1393,1835
lithium ion,22,31,25,33,29,55,87,70,90,98,183,319,468,506,566,595,546,478,528,583
lithium secondary,42,63,58,48,77,85,101,68,77,101,147,184,236,277,250,218,241,190,290,487
electric vehicle,11,10,7,12,6,6,16,18,27,36,111,153,245,290,173,198,138,221,251,343
power supply,46,46,69,47,77,114,108,144,170,126,176,262,343,321,384,335,357,385,401,372
battery pack,27,43,23,37,54,49,89,110,106,124,154,235,302,326,297,308,288,373,443,524
active material,33,38,49,47,41,44,59,63,98,102,156,213,329,308,380,362,317,367,347,462
aqueous electrolyte,27,39,42,39,55,58,76,77,142,107,125,205,276,284,278,337,382,312,271,350
ion secondary,4,13,6,16,19,28,43,32,36,40,88,150,199,289,320,322,242,216,202,239
energy storage,0,7,13,15,16,19,21,37,48,50,63,115,159,258,196,225,199,261,299,332


In [32]:
highest_abs_change_list_title_2_scaled[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,102,102,107,123,147,149,172,148,158,147,161,172,174,177,178,174,180,155,164,193
power supply,49,41,62,41,62,76,59,72,74,48,56,57,57,49,55,48,51,51,47,39
lithium secondary,45,56,52,42,62,57,56,34,34,38,47,40,40,42,36,31,35,25,34,51
lithium ion,23,27,23,29,23,37,48,35,39,37,59,69,78,77,80,86,78,63,62,61
aqueous electrolyte,29,34,38,34,44,39,42,38,62,41,40,44,46,43,39,49,55,41,32,37
battery pack,29,38,21,32,43,33,49,55,46,47,49,51,51,49,42,44,41,49,52,55
active material,35,33,44,41,33,29,32,31,43,39,50,46,55,47,54,52,45,49,41,49
electric vehicle,12,9,6,10,5,4,9,9,12,14,36,33,41,44,25,29,20,29,29,36
motor vehicle,9,8,17,5,18,8,9,11,9,7,12,15,19,20,16,10,14,10,18,27
ion secondary,4,11,5,14,15,19,24,16,16,15,28,32,33,44,45,46,35,29,24,25


### Titles - trigrams

In [271]:
growing_list_title_3, shrinking_list_title_3, highest_abs_change_list_title_3, growing_list_title_3_scaled, shrinking_list_title_3_scaled, highest_abs_change_list_title_3_scaled = growing_keywords(
    3,
    'appln_title'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [272]:
growing_list_title_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
lithium secondary battery,35,49,47,41,68,80,95,63,66,99,141,179,229,265,248,209,233,187,281,483
lithium ion battery,9,10,12,9,4,18,31,27,39,43,75,145,185,185,221,225,248,235,291,280
electrode active material,8,16,11,21,22,21,25,35,40,54,72,126,169,147,154,192,186,231,199,272
ion secondary battery,3,10,6,11,18,23,42,32,36,38,86,142,184,271,286,283,220,195,194,230
lithium ion secondary,4,13,6,16,18,28,43,32,36,40,87,150,198,283,306,316,224,202,192,224
electrolyte secondary battery,14,16,19,19,34,39,47,52,99,82,93,132,164,178,198,252,259,200,181,228
secondary battery electrode,5,5,3,4,18,14,20,19,19,20,39,58,123,113,128,131,123,147,151,213
aqueous electrolyte secondary,20,23,25,29,41,41,48,53,99,80,96,137,170,187,216,263,276,206,171,225
secondary battery manufacturing,5,8,15,12,15,22,23,24,37,47,33,73,91,76,86,91,94,67,111,155
secondary battery lithium,3,5,7,10,13,15,25,10,16,25,43,46,64,96,117,101,92,76,98,147


In [35]:
growing_list_title_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
ion secondary battery,3,9,5,10,14,15,23,16,16,14,28,31,31,41,41,41,32,26,23,24
electrode active material,9,14,10,18,18,14,14,17,17,21,23,27,28,22,22,28,27,31,23,29
lithium ion battery,10,9,11,8,3,12,17,13,17,16,24,31,31,28,31,32,36,31,34,29
lithium ion secondary,4,11,5,14,14,19,24,16,16,15,28,32,33,43,43,46,32,27,23,24
secondary battery electrode,5,4,3,3,14,9,11,9,8,8,12,13,21,17,18,19,18,20,18,22
lithium secondary battery,37,43,43,36,55,53,52,31,29,38,45,39,38,40,35,30,33,25,33,51
energy storage device,0,3,5,3,9,7,4,7,7,8,8,9,10,17,11,13,12,13,12,13
secondary battery lithium,3,4,6,9,10,10,14,5,7,10,14,10,11,15,17,15,13,10,12,15
solid state battery,0,0,0,0,0,0,0,0,1,0,1,2,3,5,4,3,2,5,6,11
secondary battery manufacturing,5,7,14,10,12,15,13,12,16,18,11,16,15,11,12,13,13,9,13,16


In [36]:
shrinking_list_title_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
hydrogen absorbing alloy,10,6,1,0,0,1,1,8,1,2,1,0,3,0,0,3,2,0,0,0
absorbing alloy electrode,7,3,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0
lithium secondary cell,7,14,10,7,9,4,5,4,11,2,5,5,7,12,1,8,8,2,9,1
alkaline storage battery,13,25,25,18,11,12,22,11,4,13,10,12,19,4,18,16,1,0,4,7
double layer capacitor,7,11,6,1,7,7,10,15,2,5,2,7,4,3,4,5,3,1,3,2
hydrogen storage alloy,8,7,6,2,2,6,3,7,2,9,3,7,3,2,1,3,3,0,1,4
sheet battery case,4,2,1,2,0,4,0,0,2,2,1,0,2,2,1,0,0,0,1,0
charge storage device,4,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,3,2,0
electric double layer,5,5,4,1,4,6,9,12,2,6,2,5,4,1,3,4,3,2,2,2
battery safety valve,3,0,0,0,1,0,2,0,0,0,0,0,4,0,1,0,0,1,0,0


In [37]:
shrinking_list_title_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
alkaline storage battery,14,22,23,16,9,8,12,5,2,5,3,3,3,1,3,2,0,0,0,1
rechargeable lithium battery,17,14,11,3,7,9,3,9,7,5,5,9,10,5,11,8,8,5,4,5
hydrogen absorbing alloy,11,5,1,0,0,1,1,4,0,1,0,0,1,0,0,0,0,0,0,0
lead acid battery,14,12,6,21,10,11,7,1,7,6,6,3,3,3,3,3,6,6,4,4
hydrogen storage alloy,9,6,5,2,2,4,2,3,1,3,1,2,1,0,0,0,0,0,0,0
electrolyte secondary cell,9,8,7,8,7,2,2,1,1,0,2,2,1,4,3,3,3,2,1,1
absorbing alloy electrode,7,3,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
lithium secondary cell,7,12,9,6,7,3,3,2,5,1,2,1,1,2,0,1,1,0,1,0
double layer capacitor,7,10,5,1,6,5,6,7,1,2,1,2,1,0,1,1,0,0,0,0
material rechargeable lithium,9,4,5,1,2,1,0,3,2,2,2,3,3,1,2,2,2,1,1,1


In [38]:
highest_abs_change_list_title_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
lithium secondary battery,35,49,47,41,68,80,95,63,66,99,141,179,229,265,248,209,233,187,281,483
lithium ion secondary,4,13,6,16,18,28,43,32,36,40,87,150,198,283,306,316,224,205,192,224
aqueous electrolyte secondary,20,23,25,29,41,41,48,53,99,80,96,137,170,187,216,263,276,206,171,225
ion secondary battery,3,10,6,11,18,23,42,32,36,38,86,142,184,271,286,283,220,198,194,230
electrolyte secondary battery,14,16,19,19,34,39,47,52,99,82,93,132,164,178,198,252,259,200,181,228
electrode active material,8,16,11,21,22,21,25,35,40,54,72,124,169,147,154,192,186,232,199,272
lithium ion battery,9,10,12,9,4,18,31,27,39,43,75,145,185,185,221,225,248,235,291,280
secondary battery manufacturing,5,8,15,12,15,22,23,24,37,47,33,73,91,76,86,91,94,67,111,155
secondary battery electrode,5,5,3,4,18,14,20,19,19,20,39,58,123,113,128,131,123,148,151,213
aqueous secondary battery,6,1,2,2,11,5,9,9,29,16,22,20,45,56,36,53,60,80,127,85


In [39]:
highest_abs_change_list_title_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
lithium secondary battery,37,43,43,36,55,53,52,31,29,38,45,39,38,40,35,30,33,25,33,51
lithium ion secondary,4,11,5,14,14,19,24,16,16,15,28,32,33,43,43,46,32,27,23,24
aqueous electrolyte secondary,21,20,23,25,33,27,26,26,43,30,31,30,28,28,31,38,40,27,20,24
ion secondary battery,3,9,5,10,14,15,23,16,16,14,28,31,31,41,41,41,32,26,23,24
electrolyte secondary battery,15,14,17,17,27,26,26,26,43,31,30,29,27,27,28,36,37,26,21,24
lithium ion battery,10,9,11,8,3,12,17,13,17,16,24,31,31,28,31,32,36,31,34,29
electrode active material,9,14,10,18,18,14,14,17,17,21,23,27,28,22,22,28,27,31,23,29
aqueous secondary battery,6,1,2,2,9,3,5,4,13,6,7,4,8,8,5,8,9,11,15,9
material lithium secondary,11,13,14,9,18,19,12,6,6,10,14,9,10,10,10,7,8,5,5,10
lead acid battery,14,12,6,21,10,11,7,1,7,6,6,3,3,3,3,3,6,6,4,4


## Abstracts

### Abstracts - unigrams

In [40]:
growing_list_abstract_1, shrinking_list_abstract_1, highest_abs_change_list_abstract_1, growing_list_abstract_1_scaled, shrinking_list_abstract_1_scaled, highest_abs_change_list_abstract_1_scaled = growing_keywords(
    1,
    'appln_abstract'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [41]:
growing_list_abstract_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,2477,2826,2601,3026,3032,3750,4759,5474,6031,6676,7915,13057,15844,17753,17823,16834,17728,18308,21510,25812
power,810,999,1200,1325,1450,1767,2195,2515,3097,3628,4901,7840,10040,11933,13007,12092,11679,13539,13223,13695
electrode,773,921,717,863,1131,1368,2091,1878,2047,2397,2814,5462,7003,7767,8036,8124,8021,8189,10463,12357
device,506,608,614,790,860,1058,1341,1468,2046,2342,3302,4205,5932,6942,7243,6730,7070,7791,8638,9911
charging,545,636,697,728,792,850,1051,1060,1552,1863,2395,3350,4760,5798,5387,5038,5671,6739,7086,8942
material,598,586,726,695,686,940,1205,1335,1443,1564,2015,3198,4483,4828,5268,5247,4683,5034,5637,7030
unit,223,362,365,389,428,633,923,1023,1041,1411,2059,3459,4870,5650,4998,5151,4860,5408,6044,6487
layer,250,296,335,338,347,573,667,830,866,968,1078,1846,2497,2939,3033,2918,3283,3470,4821,6159
cell,523,661,508,544,528,575,1018,1016,1187,1441,1932,2990,4494,5020,4762,4239,4350,4106,5027,5841
lithium,444,545,495,518,633,675,897,892,1027,1195,1510,2491,3277,3173,3447,3776,3948,3540,4276,5666


In [42]:
growing_list_abstract_1_scaled[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
power,831,858,1060,1113,1126,1128,1123,1171,1249,1292,1452,1522,1486,1599,1639,1564,1492,1605,1366,1243
device,519,522,542,663,668,676,686,684,825,834,978,816,878,930,913,870,903,923,893,900
unit,229,311,322,327,332,404,472,476,420,502,610,671,721,757,630,666,621,641,625,589
vehicle,137,178,199,169,200,181,171,217,236,280,394,360,390,401,346,314,283,338,375,474
electrode,793,791,633,725,878,874,1070,875,825,854,834,1060,1036,1041,1013,1051,1024,970,1081,1122
module,112,145,117,143,115,121,207,212,231,281,333,331,358,371,372,360,389,404,436,433
layer,256,254,296,284,269,366,341,387,349,345,319,358,370,394,382,377,419,411,498,559
charging,559,546,616,611,615,543,538,494,626,663,709,650,704,777,679,651,724,799,732,812
energy,148,180,225,294,262,262,212,325,356,383,473,433,403,397,377,352,370,410,387,391
configured,24,21,19,38,50,47,69,68,96,134,164,146,151,185,204,237,249,287,279,260


In [43]:
shrinking_list_abstract_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
telephone,49,60,59,45,39,33,43,19,31,17,24,18,28,25,34,19,33,6,9,7
mum,22,14,26,8,35,27,17,17,26,8,7,11,21,17,11,5,2,2,5,1
copyright,17,15,27,210,267,393,438,440,495,604,484,500,0,0,0,0,0,0,0,0
jpo,17,15,27,210,267,393,438,440,495,604,484,500,0,0,0,0,0,0,0,0
lixmn,15,6,1,4,0,0,0,0,1,0,0,0,0,1,2,0,0,0,0,1
manganate,13,9,4,0,7,5,7,11,8,8,18,3,6,3,11,5,0,12,7,1
claim,16,15,12,19,21,17,15,14,14,26,30,45,47,53,58,4,3,3,0,5
inpit,11,11,16,21,20,29,169,440,495,604,484,500,0,0,0,0,0,0,0,0
revolution,11,12,13,1,0,8,14,10,2,12,6,0,3,8,1,5,18,12,3,1
computed,12,10,9,3,2,7,2,0,1,10,5,11,11,8,7,7,4,6,7,2


In [44]:
shrinking_list_abstract_1_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
voltage,735,915,967,874,899,969,812,955,876,850,708,723,612,587,612,599,579,545,517,480
battery,2541,2428,2298,2541,2354,2395,2434,2550,2432,2377,2344,2534,2345,2379,2246,2177,2264,2170,2223,2343
charge,401,383,326,339,373,366,287,361,379,435,374,369,360,328,323,283,267,265,213,224
circuit,576,493,564,545,581,718,599,610,635,707,511,521,452,473,438,442,437,471,479,416
alloy,171,139,123,86,79,86,76,59,31,47,42,37,38,31,33,35,27,21,24,23
mean,259,204,201,195,218,160,164,189,133,134,152,137,152,121,147,113,104,94,97,117
capacity,206,204,204,206,155,185,164,150,169,113,130,157,140,116,120,135,115,103,83,96
nickel,167,185,151,125,107,103,116,86,56,69,81,73,64,42,61,60,58,48,57,60
discharge,223,210,173,217,170,178,185,191,212,210,157,165,191,172,160,163,145,144,122,117
current,576,592,611,586,572,640,554,614,611,642,524,552,493,501,448,453,476,495,472,477


In [45]:
highest_abs_change_list_abstract_1[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,2477,2826,2601,3026,3032,3750,4759,5474,6031,6676,7915,13057,15844,17753,17823,16834,17728,18308,21510,25812
power,810,999,1200,1325,1450,1767,2195,2515,3097,3628,4901,7840,10040,11933,13007,12092,11679,13539,13223,13695
electrode,773,921,717,863,1131,1368,2091,1878,2047,2397,2814,5462,7003,7767,8036,8124,8021,8189,10463,12357
device,506,608,614,790,860,1058,1341,1468,2046,2342,3302,4205,5932,6942,7243,6730,7070,7791,8638,9911
charging,545,636,697,728,792,850,1051,1060,1552,1863,2395,3350,4760,5798,5387,5038,5671,6739,7086,8942
unit,223,362,365,389,428,633,923,1023,1041,1411,2059,3459,4870,5650,4998,5151,4860,5408,6044,6487
cell,523,661,508,544,528,575,1018,1016,1187,1441,1932,2990,4494,5020,4762,4239,4350,4106,5027,5841
material,598,586,726,695,686,940,1205,1335,1443,1564,2015,3198,4483,4828,5268,5247,4683,5034,5637,7030
vehicle,134,207,225,201,257,284,335,466,586,787,1330,1857,2633,2995,2749,2432,2217,2854,3629,5226
lithium,444,545,495,518,633,675,897,892,1027,1195,1510,2491,3277,3173,3447,3776,3948,3540,4276,5666


In [46]:
highest_abs_change_list_abstract_1_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery,2541,2428,2298,2541,2354,2395,2434,2550,2432,2377,2344,2534,2345,2379,2246,2177,2264,2170,2223,2343
power,831,858,1060,1113,1126,1128,1123,1171,1249,1292,1452,1522,1486,1599,1639,1564,1492,1605,1366,1243
electrode,793,791,633,725,878,874,1070,875,825,854,834,1060,1036,1041,1013,1051,1024,970,1081,1122
voltage,735,915,967,874,899,969,812,955,876,850,708,723,612,587,612,599,579,545,517,480
charging,559,546,616,611,615,543,538,494,626,663,709,650,704,777,679,651,724,799,732,812
circuit,576,493,564,545,581,718,599,610,635,707,511,521,452,473,438,442,437,471,479,416
unit,229,311,322,327,332,404,472,476,420,502,610,671,721,757,630,666,621,641,625,589
cell,536,568,449,457,410,367,521,473,479,513,572,580,665,673,600,548,556,487,519,530
device,519,522,542,663,668,676,686,684,825,834,978,816,878,930,913,870,903,923,893,900
part,163,174,261,315,269,361,386,328,319,323,357,375,468,434,411,343,309,303,385,341


### Abstracts - bigrams

In [47]:
growing_list_abstract_2, shrinking_list_abstract_2, highest_abs_change_list_abstract_2, growing_list_abstract_2_scaled, shrinking_list_abstract_2_scaled, highest_abs_change_list_abstract_2_scaled = growing_keywords(
    2,
    'appln_abstract'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [48]:
growing_list_abstract_2[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,229,281,282,350,384,595,776,833,858,946,1135,2154,2652,2790,2842,2861,2794,2592,3001,3722
active material,236,249,267,279,286,366,584,641,713,764,911,1651,2234,2408,2422,2430,2132,2331,2554,3334
battery cell,80,77,45,67,102,112,182,260,249,329,574,1116,1452,2007,1890,1818,1784,1729,2111,2851
power supply,163,201,292,247,310,503,546,577,709,673,987,1483,1817,2078,2287,1984,1885,2133,2430,2611
battery pack,149,173,109,210,225,251,383,467,459,558,567,951,1263,1312,1290,1067,1337,1495,1638,2255
electrode active,91,86,84,122,157,175,248,284,296,372,448,1020,1312,1341,1415,1446,1347,1352,1471,2102
battery module,27,52,38,37,27,30,188,132,177,201,287,528,816,1034,1065,842,1001,1130,1567,1814
energy storage,28,61,61,65,66,72,105,171,292,274,389,615,737,947,921,847,1012,1360,1463,1653
current collector,61,54,57,62,70,126,124,213,238,293,314,544,638,702,655,541,646,717,1158,1477
lithium ion,85,98,111,114,105,172,183,222,288,327,484,813,1174,1109,1207,1321,1347,1208,1347,1423


In [49]:
growing_list_abstract_2_scaled[2]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
battery cell,82,66,40,56,79,72,93,121,100,117,170,217,215,269,238,235,228,205,218,259
battery module,28,45,34,31,21,19,96,61,71,72,85,102,121,139,134,109,128,134,162,165
energy storage,29,52,54,55,51,46,54,80,118,98,115,119,109,127,116,110,129,161,151,150
secondary battery,235,241,249,294,298,380,397,388,346,337,336,418,392,374,358,370,357,307,310,338
electrode active,93,74,74,102,122,112,127,132,119,132,133,198,194,180,178,187,172,160,152,191
material layer,18,12,34,33,33,65,63,73,61,73,58,78,69,85,74,63,51,70,77,96
storage device,49,46,42,57,49,38,36,61,109,80,137,109,121,127,129,107,111,150,109,123
power storage,17,11,21,25,7,13,8,42,71,38,88,56,107,92,108,93,111,129,88,90
current collector,63,46,50,52,54,80,63,99,96,104,93,106,94,94,83,70,83,85,120,134
power supply,167,173,258,207,241,321,279,269,286,240,292,288,269,278,288,257,241,253,251,237


In [50]:
shrinking_list_abstract_2[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
absorbing alloy,34,34,9,6,2,2,3,22,4,6,3,1,5,0,2,8,13,4,15,0
hydrogen absorbing,34,34,10,6,2,2,3,27,4,6,4,1,5,0,2,10,13,4,15,1
copyright jpo,17,15,27,210,267,393,438,440,495,604,484,500,0,0,0,0,0,0,0,0
polarity type,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0
rest period,18,0,1,0,1,0,0,0,0,0,3,5,1,0,3,2,3,4,3,2
independent claim,16,15,12,18,21,14,15,14,12,25,29,42,44,50,55,0,0,0,0,0
solid solution,18,9,8,6,8,1,4,7,2,10,1,16,20,31,30,56,16,7,6,4
nickel hydroxide,22,53,37,29,8,11,15,11,3,13,2,5,24,9,46,6,9,16,16,10
pc card,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
lithium manganate,13,9,3,0,6,5,5,11,5,8,18,3,4,1,6,4,0,12,6,1


In [51]:
shrinking_list_abstract_2_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
lithium manganese,58,46,24,23,5,4,10,7,6,5,5,11,9,10,10,8,9,4,5,6
battery charger,55,35,48,65,58,43,41,32,33,31,36,28,20,19,16,14,13,8,14,7
charging current,73,51,57,39,46,56,58,41,58,51,31,36,30,36,25,28,33,27,26,28
power source,111,106,95,116,160,110,128,124,112,123,97,89,80,92,79,94,77,86,73,67
battery voltage,59,80,72,70,88,68,52,61,51,52,25,29,27,18,28,24,25,21,16,17
element group,54,17,19,14,17,22,17,21,12,18,17,12,12,9,7,12,14,18,8,12
battery charging,68,51,48,39,49,38,49,50,49,48,49,38,35,43,34,29,33,46,31,31
electrochemical cell,56,55,41,16,39,15,36,27,31,24,36,26,34,27,28,24,23,27,26,21
absorbing alloy,35,29,8,5,2,1,2,10,2,2,1,0,1,0,0,1,2,0,2,0
hydrogen absorbing,35,29,9,5,2,1,2,13,2,2,1,0,1,0,0,1,2,0,2,0


In [52]:
highest_abs_change_list_abstract_2[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
secondary battery,229,281,282,350,384,595,776,833,858,946,1135,2154,2652,2790,2842,2861,2794,2592,3001,3722
active material,236,249,267,279,286,366,584,641,713,764,911,1651,2234,2408,2422,2430,2132,2331,2554,3334
battery cell,80,77,45,67,102,112,182,260,249,329,574,1116,1452,2007,1890,1818,1784,1729,2111,2851
power supply,163,201,292,247,310,503,546,577,709,673,987,1483,1817,2078,2287,1984,1885,2133,2430,2611
battery pack,149,173,109,210,225,251,383,467,459,558,567,951,1263,1312,1290,1067,1337,1495,1638,2255
battery module,27,52,38,37,27,30,188,132,177,201,287,528,816,1034,1065,842,1001,1130,1567,1814
storage device,48,53,47,68,63,60,70,131,270,224,461,564,815,949,1024,826,866,1265,1054,1357
electrode active,91,86,84,122,157,175,248,284,296,372,448,1020,1312,1341,1415,1446,1347,1352,1471,2102
power storage,17,13,24,30,9,21,15,90,176,107,297,287,724,686,860,717,866,1087,851,994
electric vehicle,27,20,6,12,11,13,26,20,62,93,277,415,656,778,495,493,408,639,551,955


In [53]:
highest_abs_change_list_abstract_2_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
power supply,167,173,258,207,241,321,279,269,286,240,292,288,269,278,288,257,241,253,251,237
copyright jpo,17,13,24,176,207,251,224,205,200,215,143,97,0,0,0,0,0,0,0,0
secondary battery,235,241,249,294,298,380,397,388,346,337,336,418,392,374,358,370,357,307,310,338
jpo ncipi,5,3,4,11,120,232,138,0,0,0,0,0,0,0,0,0,0,0,0,0
battery cell,82,66,40,56,79,72,93,121,100,117,170,217,215,269,238,235,228,205,218,259
jpo inpit,11,9,14,18,16,19,86,205,200,215,143,97,0,0,0,0,0,0,0,0
battery pack,153,149,96,176,175,160,196,218,185,199,168,185,187,176,163,138,171,177,169,205
power storage,17,11,21,25,7,13,8,42,71,38,88,56,107,92,108,93,111,129,88,90
storage device,49,46,42,57,49,38,36,61,109,80,137,109,121,127,129,107,111,150,109,123
electrode plate,62,109,38,50,83,72,95,60,52,53,45,88,70,59,49,45,58,54,61,68


### Abstracts - trigrams

In [279]:
growing_list_abstract_3, shrinking_list_abstract_3, highest_abs_change_list_abstract_3, growing_list_abstract_3_scaled, shrinking_list_abstract_3_scaled, highest_abs_change_list_abstract_3_scaled = growing_keywords(
    3,
    'appln_abstract'
)


  0%|          | 0/20 [00:00<?, ?it/s]

N-grams created
N-grams counted
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created
Difference over whole timespan calculated
Sum of absolute differences (abs(count_year_i+1 - count_year_i)) calculated
Positive change plot created
Negative change plot created
Absolute change plot created


In [280]:
growing_list_abstract_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
electrode active material,89,82,72,121,136,157,241,278,294,356,427,991,1253,1288,1323,1386,1273,1308,1382,1990
active material layer,17,9,29,27,37,96,114,143,134,188,186,364,419,553,499,383,336,510,619,883
lithium secondary battery,46,91,72,65,112,128,192,140,124,178,222,340,451,487,490,410,492,374,540,844
lithium ion battery,15,26,41,31,16,49,58,83,99,121,197,321,476,421,449,520,618,568,702,685
energy storage device,5,41,27,39,33,20,44,70,109,94,147,183,224,367,367,277,370,538,576,648
secondary battery electrode,29,27,21,25,55,82,113,109,95,122,146,286,417,422,384,400,419,441,435,593
electrode current collector,7,7,10,12,15,15,24,46,57,104,96,135,206,171,155,172,215,226,412,539
power storage device,11,3,7,10,2,8,4,30,69,37,183,166,317,310,361,311,311,520,293,394
electrolyte secondary battery,25,27,41,36,65,74,110,118,180,129,113,214,301,269,437,425,412,287,264,405
aqueous electrolyte secondary,24,38,45,46,67,75,111,116,182,135,121,216,307,289,456,427,421,285,248,390


In [329]:
# Generate LaTeX code
#generate_latex_code(growing_list_abstract_3[1])


In [308]:
# Export as PNG
#dfi.export(growing_list_abstract_3[2], 'growing_list_abstract_3.png')


In [318]:
growing_list_abstract_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
electrode active material,91,70,64,102,106,100,123,129,119,127,126,192,185,173,167,179,163,155,143,181
active material layer,17,8,26,23,29,61,58,67,54,67,55,71,62,74,63,50,43,60,64,80
energy storage device,5,35,24,33,26,13,23,33,44,33,44,36,33,49,46,36,47,64,60,59
lithium ion battery,15,22,36,26,12,31,30,39,40,43,58,62,70,56,57,67,79,67,73,62
electrode current collector,7,6,9,10,12,10,12,21,23,37,28,26,30,23,20,22,27,27,43,49
lithium secondary battery,47,78,64,55,87,82,98,65,50,63,66,66,67,65,62,53,63,44,56,77
plurality battery cell,3,1,3,1,5,6,5,13,10,10,9,26,26,31,22,27,27,27,32,32
power storage device,11,3,6,8,2,5,2,14,28,13,54,32,47,42,45,40,40,62,30,36
current collector electrode,5,7,5,3,6,7,6,13,15,16,17,25,17,15,17,13,17,16,22,29
secondary battery electrode,30,23,19,21,43,52,58,51,38,43,43,56,62,57,48,52,54,52,45,54


In [328]:
# Generate LaTeX code
#generate_latex_code(growing_list_abstract_3_scaled[1])


In [327]:
# Export as PNG
#dfi.export(growing_list_abstract_3_scaled[2], 'growing_list_abstract_3_scaled.png')


In [59]:
shrinking_list_abstract_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
hydrogen absorbing alloy,34,34,9,6,2,2,3,22,4,6,3,1,5,0,2,8,13,4,15,0
temperature detection section,12,1,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0
charge storage device,15,3,2,0,2,3,4,2,2,8,4,10,8,4,4,6,9,17,3,3
copyright jpo inpit,11,11,16,21,20,29,169,440,495,604,484,500,0,0,0,0,0,0,0,0
absorbing alloy electrode,11,8,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,3,0
safety valve element,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
battery safety valve,10,1,0,0,3,0,0,2,0,4,3,7,3,1,2,0,0,2,1,0
portable information terminal,13,2,1,1,0,0,0,0,0,0,0,0,0,18,3,9,2,8,3,3
lithium secondary cell,11,27,10,10,11,6,12,4,11,6,5,8,11,13,3,17,14,6,6,1
hydrogen storage alloy,18,36,21,5,19,38,23,29,5,33,22,24,11,6,10,20,3,4,11,9


In [60]:
shrinking_list_abstract_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
hydrogen absorbing alloy,35,29,8,5,2,1,2,10,2,2,1,0,1,0,0,1,2,0,2,0
lithium manganese oxide,21,15,10,5,0,1,1,1,0,1,1,5,3,4,2,2,2,2,2,1
lead acid battery,27,29,19,36,28,17,10,8,19,11,9,11,9,9,10,11,13,12,12,9
hydrogen storage alloy,18,31,19,4,15,24,12,14,2,12,7,5,2,1,1,3,0,0,1,1
external power source,21,6,5,6,8,8,11,11,8,9,10,9,5,9,9,9,7,9,6,4
charge discharge cycle,21,16,11,16,19,10,13,16,15,13,11,10,9,10,6,7,7,5,4,5
charge storage device,15,3,2,0,2,2,2,1,1,3,1,2,1,1,1,1,1,2,0,0
rechargeable lithium battery,17,19,9,2,6,7,2,7,8,6,5,10,4,2,7,4,5,1,1,3
alkaline storage battery,15,28,17,16,13,13,13,8,2,4,4,3,2,0,2,2,0,0,0,1
double layer capacitor,14,26,23,16,24,24,21,16,5,11,8,6,4,3,4,2,1,1,1,1


In [61]:
highest_abs_change_list_abstract_3[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
electrode active material,89,82,72,121,136,157,241,278,294,356,427,991,1253,1288,1323,1386,1273,1308,1382,1990
lithium secondary battery,46,91,72,65,112,128,192,140,124,178,222,340,451,487,490,410,492,374,540,844
active material layer,17,9,29,27,37,96,114,143,134,188,186,364,419,553,499,383,336,510,619,883
copyright jpo inpit,11,11,16,21,20,29,169,440,495,604,484,500,0,0,0,0,0,0,0,0
power storage device,11,3,7,10,2,8,4,30,69,37,183,166,317,310,361,311,311,524,293,394
lithium ion battery,15,26,41,31,16,49,58,83,99,121,197,321,476,421,449,520,618,568,702,685
aqueous electrolyte secondary,24,38,45,46,67,75,111,116,182,135,121,216,307,289,456,427,421,284,248,390
electrolyte secondary battery,25,27,41,36,65,74,110,118,180,129,113,214,301,269,437,425,412,286,264,405
energy storage device,5,41,27,39,33,20,44,70,109,94,147,183,224,367,367,277,370,538,576,648
lithium ion secondary,12,29,18,30,33,63,72,74,76,84,164,270,366,418,442,506,379,342,291,345


In [62]:
highest_abs_change_list_abstract_3_scaled[2]


Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
copyright jpo ncipi,5,3,4,11,120,232,138,0,0,0,0,0,0,0,0,0,0,0,0,0
copyright jpo inpit,11,9,14,18,16,19,86,205,200,215,143,97,0,0,0,0,0,0,0,0
electrode active material,91,70,64,102,106,100,123,129,119,127,126,192,185,173,167,179,163,155,143,181
lithium secondary battery,47,78,64,55,87,82,98,65,50,63,66,66,67,65,62,53,63,44,56,77
active material layer,17,8,26,23,29,61,58,67,54,67,55,71,62,74,63,50,43,60,64,80
power storage device,11,3,6,8,2,5,2,14,28,13,54,32,47,42,45,40,40,62,30,36
energy storage device,5,35,24,33,26,13,23,33,44,33,44,36,33,49,46,36,47,64,60,59
electrolyte secondary battery,26,23,36,30,50,47,56,55,73,46,33,42,45,36,55,55,53,34,27,37
aqueous electrolyte secondary,25,33,40,39,52,48,57,54,73,48,36,42,45,39,57,55,54,34,26,35
lithium ion battery,15,22,36,26,12,31,30,39,40,43,58,62,70,56,57,67,79,67,73,62


## Search certain patterns

In [123]:
beg_or_space = '( +|^)'
end_or_space = '( +|$)'

def search_for_string(string, remove_punctuation):
    
    def remove_punctuation(item):
        item = re.sub('[^a-zA-Z]', ' ', item)
        return item
    
    feature = 'appln_title'
    feature_lg = 'appln_title_lg'
    data_this = data[data[feature_lg]=='en']
    
    if remove_punctuation:
        result_titles = data_this[data_this[feature].map(lambda x: remove_punctuation(x)).str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]
    else:
        result_titles = data_this[data_this[feature].str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]
        
    feature = 'appln_abstract'
    feature_lg = 'appln_abstract_lg'
    data_this = data[data[feature_lg]=='en']
    
    if remove_punctuation:
        result_abstracts = data_this[data_this[feature].map(lambda x: remove_punctuation(x)).str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]
    else:
        result_abstracts = data_this[data_this[feature].str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]
        
    return [result_titles, result_abstracts]

def get_occurence_counts(result):

    dicts = []

    print_ = ['Titles:', 'Abstracts:']
    
    for i, item in enumerate(result):

        dict_ = {}

        for year in set(item['earliest_publn_year_this_family_id']):

            ids = set(item[item['earliest_publn_year_this_family_id'] == year]['docdb_family_id'])
            num_ids = len(ids)

            dict_[year] = [num_ids, ids]

        #dicts.append(dict_)
        print(print_[i])
        
        for year in list(dict_):
            print(str(year)+': '+str(dict_[year][0])+' '+str(dict_[year][1]))
        
        print()
            

In [79]:
circular_economy = search_for_string('circular economy', True)


In [124]:
get_occurence_counts(circular_economy)


Titles:

Abstracts:
2012: 1 {45424723}



In [44]:
artificial_intelligence = search_for_string('artificial intelligence', True) 


In [125]:
get_occurence_counts(artificial_intelligence)


Titles:
2019: 1 {68983016}

Abstracts:
2017: 1 {58273994}
2018: 1 {64395568}
2019: 13 {68534112, 68070945, 67775140, 67768556, 68068844, 67806895, 68381807, 68070962, 67950899, 66101653, 68101366, 61873658, 68067835}



In [87]:
AI = search_for_string(beg_or_space+'AI'+end_or_space, True)


  result_titles = data_this[data_this[feature].map(lambda x: remove_punctuation(x)).str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]
  result_abstracts = data_this[data_this[feature].map(lambda x: remove_punctuation(x)).str.contains(string, case=False)][['earliest_publn_year_this_family_id', 'docdb_family_id', feature]]


In [134]:
AI[1]

Unnamed: 0,earliest_publn_year_this_family_id,docdb_family_id,appln_abstract
324300,2003,27762088,"Disclosed are new Ca, Mg and Ni-containing all..."
324301,2003,27762088,"Disclosed are new Ca, Mg and Ni-containing all..."
324302,2003,27762088,"Disclosed are new Ca, Mg and Ni-containing all..."
324297,2003,27762088,"Disclosed are new Ca, Mg and Ni-containing all..."
324298,2003,27762088,"Disclosed are new Ca, Mg and Ni-containing all..."
...,...,...,...
4053596,2019,68692627,A mobile robot and drone device configured to ...
4053597,2019,68692627,A mobile robot and drone device configured to ...
4053598,2019,68692627,A mobile robot and drone device configured to ...
4053599,2019,68692627,A mobile robot and drone device configured to ...


In [140]:
list(set(AI[1]['appln_abstract']))[6]


'The invention relates to a piece of glass, in particular glass solder, comprising the following components in mol%: P2O5 37-50 mol%, in particular 39-48 mol%, AI2O3 0-14 mol%, in particular 2-12 mol%, B2O3 2-10 mol%, in particular 4-8 mol%, Na2O 0-30 mol%, in particular 0-20 mol%, M2O 0-20 mol%, in particular 12-20 mol%, wherein M can be K, Cs or Rb, Li2O 0-42 mol%, in particular 0-40 mol%, preferably 17-40 mol%, BaO 0-20 mol%, in particular 0-20 mol%, preferably 5-20 mol%, and Bi2O3 0-10 mol%, in particular 1-5 mol%, preferably 2-5 mol%.'

In [126]:
get_occurence_counts(AI)


Titles:

Abstracts:
2016: 4 {54849608, 54010916, 57197533, 57320740}
2017: 4 {56896520, 60784615, 58709628, 60159359}
2018: 2 {64454152, 60182650}
2019: 10 {68070945, 66542054, 67987686, 67768556, 67806895, 68692627, 65015668, 66101653, 67987641, 65723514}
2003: 1 {27762088}
2004: 2 {33490608, 30767881}
2007: 1 {38256395}
2008: 1 {40002286}
2009: 1 {40833651}
2010: 1 {42119831}
2011: 4 {44711897, 42139122, 44937546, 42562666}
2012: 5 {45993992, 45688430, 43901104, 46577617, 46456891}
2013: 8 {47553025, 47089058, 48873338, 47215535, 46147450, 49548859, 47471837, 48128254}
2014: 4 {49753250, 49620027, 51843901, 49301687}
2015: 5 {52991072, 54554597, 49667465, 49949946, 51383742}



In [89]:
industry_4_0 = search_for_string('industry 4.0', False)


In [127]:
get_occurence_counts(industry_4_0)


Titles:

Abstracts:



In [91]:
smart_city = search_for_string('smart city', True) 


In [128]:
get_occurence_counts(smart_city)


Titles:

Abstracts:
2016: 1 {55653399}
2017: 1 {57983369}
2018: 1 {63445648}

