In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from math import pi

from matplotlib import pyplot as plt
from matplotlib.widgets import Slider
from ipywidgets import widgets, interactive

from bokeh.io import output_notebook, show
from bokeh.plotting import figure, save
from bokeh.models import Legend, LegendItem

output_notebook()

In [2]:
# Set dates and regions
startYear = 2017
endYear = 2023

end = int(str(endYear)[-2:])

proj_path = Path.cwd().parent

In [3]:
# Define inputs

year = 2023

region_code = ["EC", "BY", "TN", "UY", "VN"]
region_WB = {"EC": "ECU", "BY": "BLR", "TN": "TUN", "UY": "URY", "VN": "VNM"}
region_name = {"EC": "Ecuador", "BY": "Belarus", "TN": "Tunicia", "UY": "Uruguay", "VN": "Vietnam"}
region_colors = {"EC": "Blue", "BY": "Green", "TN": "Purple", "UY": "Brown", "VN": "Red"}

In [4]:
# Import labour force statitics
# Source: https://data.worldbank.org/indicator/SL.TLF.TOTL.IN

df_labour_stats = pd.read_csv(f'{proj_path}/data/WB_labour_force_stats.csv')

# IPI Calculations

Calculate IPI component data. 

In [5]:
# Create empty dataframe to store output
df_IPI = pd.DataFrame(columns=['Region', 'Year', 'Efficiency', 'Diversification', 'Quality', 'Internationalization', 'Time'])

# Iterate through regional data to calculate indicators
for region in region_code:

    # Import patent information dataframe for the region
    df_patents = pd.read_csv(f'{proj_path}/data/{region}/{startYear}-{end}PatentsWithCitations_{region}.csv')
    df_patents['year'] = df_patents['publicationDate'].str[:4]
    
    for year in range(startYear, endYear+1):
        
        df_patents_year = df_patents[df_patents['year']==f'{year}']
        total_patents = len(df_patents_year)

        # Efficiency indicator
        labor_force = df_labour_stats[df_labour_stats['Country Code']==region_WB[region]][f'{year}'].item()
        efficiency_ind = (total_patents / labor_force) * 100000

        # Diversification indicator
        total_IPC = df_patents_year['IPC_count'].sum()
        diversification_ind = total_IPC / total_patents

        # Quality indicator
        total_backward_citations = df_patents_year['backwardCitationsESpaceB'].sum()
        quality_ind = total_backward_citations / total_patents

        # Internationalization indicator
        total_geographical_extensions = df_patents_year['geographicalExtensions'].sum()
        internationalization_ind = total_geographical_extensions / total_patents

        # Time indicator
        time_in_months = df_patents_year['timeDifference'].sum()
        time_ind = time_in_months / total_patents

        df_IPI.loc[df_IPI.shape[0]] = [region_name[region], year, efficiency_ind, diversification_ind, quality_ind, internationalization_ind, time_ind]

df_IPI['IPI'] = df_IPI['Efficiency'] + df_IPI['Diversification'] + df_IPI['Quality'] + df_IPI['Internationalization'] + df_IPI['Time']


In [6]:
# Export indicator data as csv into data folder

df_IPI.to_csv(f'{proj_path}/data/ipi.csv', index=False, encoding='utf-8-sig')

# Visualisations and Plotting

Visualise the data as various types of graphs.

In [7]:
# df_IPI = pd.read_csv(os.getcwd()+'/data/ipi.csv')

In [8]:
def createLegend(renderers, labels):
    items = []

    for i, r in enumerate(renderers):
        items.append(LegendItem(label=labels[i], renderers=[r], index=0))

    return Legend(items=items)

In [9]:
def multiple_line_graph(df_IPI, region_code, region_colors, region_name, title='IPIs'):
    
    # Create new figure
    p = figure(width=600, height=400, tools="hover", tooltips="@x: @y")
    p.xaxis.axis_label = 'Year'
    p.yaxis.axis_label = 'IPI'
    p.title = title
    p.title_location = 'above'

    # Plot line for each region
    renderers = []
    for region in region_code:
        
        df = df_IPI[df_IPI['Region']==region_name[region]]

        r = p.line(df['Year'], df['IPI'],line_width=1.5, line_color=region_colors[region], line_alpha=0.75)
        renderers.append(r)

    
    # Create legend
    legend = createLegend(renderers, list(region_name.values()))
    legend.click_policy = 'mute'
    p.add_layout(legend, 'right')

    return p


In [10]:
def stackedBarGraph(df_IPI, year, region_name, colors_comp=['red', 'blue', 'green', 'orange', 'purple']):
    
    regions = list(region_name.values())
    components = ["Efficiency", "Diversification", "Quality", "Internationalization", "Time"]

    # Retrieve IPI data for the year 2023
    data = df_IPI[df_IPI['Year']==year][['Region']+components]

    # Create figure
    p = figure(x_range=regions, height=350, title=f"IPI indicators for {year}",
            toolbar_location=None, tools="hover", tooltips="$name: @$name")

    hover_alpha = 1.0
    nonhover_alpha = 0.75

    # Create bar graph
    renderers = p.vbar_stack(stackers=components, x='Region', width=0.9, color=colors_comp, source=data, alpha=nonhover_alpha, hover_alpha=hover_alpha)
    
    p.y_range.start = 0
    p.x_range.range_padding = 0.1

    p.xgrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.outline_line_color = None

    # Create legend
    legend = createLegend(renderers, components)
    legend.click_policy = 'mute'
    p.add_layout(legend, 'right')

    return p

In [11]:
# Create radar charts for each region
def individualRadarCharts(df_IPI, year, region_code, region_colors, region_name, visible=False):
    labels = ['Efficiency', 'Diversification', 'Quality', 'Internationalization', 'Time']
    labels_short = ['Eff.', 'Div.', 'Qlty.', 'Int.', 'Time', 'Eff.', ]

    # Calculate angles
    angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False)
    angles = np.concatenate((angles,[angles[0]]))

    labels.append(labels[0])
    plt.style.use('ggplot')

    # Create figure & gridspecs
    fig = plt.figure(figsize=(15, 8))
    gs = fig.add_gridspec(2, 7)

    # Iterate over the region codes to create radar plots
    for i, region in enumerate(region_code):

        if i < len(region_code):

            # Set gridspec
            if i < 3:
                row = 0
                col = i*2
            else:
                row = 1
                col = (i - 3)*2+1

            # Retrieve dataframe for current region
            df = df_IPI[df_IPI['Region']==region_name[region]]
            data = df[df['Year']==year][labels].values.tolist()[0]
            
            # Plot the data
            ax = fig.add_subplot(gs[row, col], polar=True)
            ax.plot(angles, data, 'o--', color=region_colors[region], linewidth=1, markersize=1.5)
            ax.fill(angles, data, alpha=0.25, color=region_colors[region])
            ax.set_thetagrids(angles * 180/np.pi, labels_short)
            ax.set_title(region_name[region])
            
    # Adjust the spacing between subplots
    plt.subplots_adjust(wspace=-0.15, hspace=-0.15)
    plt.grid(True)
    plt.title(f'IPI by Region in {year}')
    
    fig.set_visible(visible)
    return fig

In [12]:
['All']+list(region_name.values())
[i for i in range(startYear, endYear+1)]

[2017, 2018, 2019, 2020, 2021, 2022, 2023]

In [13]:
def complexRadarChart(slider, year, area, df_IPI=df_IPI, region_code=region_code, region_colors=region_colors, region_name=region_name, visible=False):
    labels = ['Efficiency', 'Diversification', 'Quality', 'Internationalization', 'Time']
    labels_short = ['Eff.', 'Div.', 'Qlty.', 'Int.', 'Time', 'Eff.', ]

    angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False)
    angles = np.concatenate((angles,[angles[0]]))

    labels.append(labels[0])

    plt.style.use('ggplot')

    # Create figure
    fig = plt.figure(figsize=(8, 6))

    # Define axis
    ax = fig.add_subplot(polar=True)
    ax.set_thetagrids(angles * 180/np.pi, labels_short)
    ax.set_title(f'IPIs in {year}')

    # Iterate over the region codes
    if area != 'All':
        temp = region_name.copy()
        region_code = [key for key, value in temp.items() if value==area]

    for region in region_code:
        # Get dataframe for current region
        df = df_IPI[df_IPI['Region']==region_name[region]]
        data = df[df['Year']==year][labels].values.tolist()[0]

        # Plot the data
        ax.plot(angles, data, 'o--', color=region_colors[region], linewidth=1, markersize=1.5, alpha=0.75, label=region_name[region])
        ax.fill(angles, data, alpha=0.25, color=region_colors[region])
   

    ax.set_ylim(0,slider)

    ax.legend()
    fig.set_visible(visible)
    
    return fig

max_val = df_IPI[['Efficiency', 'Diversification', 'Quality', 'Internationalization', 'Time']].max().max()

areas = widgets.Dropdown(
        options=['All']+list(region_name.values()),
        value='All',
        description='Area: ',
    )

years = widgets.Dropdown(
    options=[i for i in range(startYear, endYear+1)],
    value=2023,
    description='Year: ',
    )

zoom_slider = widgets.IntSlider(
        value=max_val,
        min=0,
        max=max_val+10,
        step=1,
        description="Zoom:",
        orientation="horizontal"
        )


# Output
Create charts using above functions

In [14]:
line_graph = multiple_line_graph(df_IPI, region_code, region_colors, region_name)

show(line_graph)

In [15]:
bar_graph = stackedBarGraph(df_IPI, year, region_name)

show(bar_graph)

In [16]:
def interactive_radar(area, year, zoom_slider):
    # Returns interactive radar chart
    return complexRadarChart(area=area, year=year, slider=zoom_slider, df_IPI=df_IPI, region_code=region_code, region_colors=region_colors, region_name=region_name, visible=True)

interactive(interactive_radar, area=areas, year=years, zoom_slider=zoom_slider)

interactive(children=(Dropdown(description='Area: ', options=('All', 'Ecuador', 'Belarus', 'Tunicia', 'Uruguay…