In [1]:
# Imports
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
# ISO Countries names
countries = pd.read_csv(r'countries_ISO.txt')
countries = countries.rename(
    columns={'alpha-2': 'alpha_2', 'alpha-3': 'alpha_3'})

In [3]:
countries[countries['name'].str.contains('Russ')]

Unnamed: 0,name,alpha_2,alpha_3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
183,Russian Federation,RU,RUS,643,ISO 3166-2:RU,Europe,Eastern Europe,,150.0,151.0,


In [4]:
# Data frame with the total number of patents granted per country/year
df_pat = pd.read_csv(r'total_patent_grants.csv', header=7).fillna(0)
df_pat = df_pat.rename(columns={'Origin (Code)': 'alpha_2'})
df_pat

Unnamed: 0,Origin,alpha_2,Office,Type,1980,1981,1982,1983,1984,1985,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Albania,AL,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.0,3.0,12.0,6.0,0.0,12.0,4.0,0.0,7.0
1,Algeria,DZ,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,41.0,0.0,0.0,79.0,64.0,85.0,35.0,34.0,49.0,104.0
2,Andorra,AD,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,10.0,11.0,12.0,14.0,18.0
3,Angola,AO,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0
4,Argentina,AR,Total,Total,1591.0,820.0,740.0,538.0,513.0,0.0,...,354.0,410.0,407.0,375.0,377.0,355.0,290.0,368.0,439.0,501.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Yemen,YE,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,14.0,11.0,8.0,2.0,0.0,24.0,9.0,24.0,6.0,1.0
170,Yugoslavia,YU,Total,Total,79.0,95.0,55.0,126.0,157.0,207.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
171,Zaire,ZR,Total,Total,3.0,8.0,6.0,6.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
172,Zambia,ZM,Total,Total,0.0,0.0,2.0,0.0,0.0,10.0,...,2.0,2.0,6.0,4.0,3.0,12.0,5.0,159.0,4.0,0.0


In [5]:
# Including ISO names in the patents data frame
df_pat = pd.merge(df_pat, countries[['name', 'alpha_2']], how='left', on='alpha_2')
df_pat

Unnamed: 0,Origin,alpha_2,Office,Type,1980,1981,1982,1983,1984,1985,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,name
0,Albania,AL,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,3.0,12.0,6.0,0.0,12.0,4.0,0.0,7.0,Albania
1,Algeria,DZ,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,79.0,64.0,85.0,35.0,34.0,49.0,104.0,Algeria
2,Andorra,AD,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,10.0,11.0,12.0,14.0,18.0,Andorra
3,Angola,AO,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,Angola
4,Argentina,AR,Total,Total,1591.0,820.0,740.0,538.0,513.0,0.0,...,410.0,407.0,375.0,377.0,355.0,290.0,368.0,439.0,501.0,Argentina
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,Yemen,YE,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,11.0,8.0,2.0,0.0,24.0,9.0,24.0,6.0,1.0,Yemen
170,Yugoslavia,YU,Total,Total,79.0,95.0,55.0,126.0,157.0,207.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
171,Zaire,ZR,Total,Total,3.0,8.0,6.0,6.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
172,Zambia,ZM,Total,Total,0.0,0.0,2.0,0.0,0.0,10.0,...,2.0,6.0,4.0,3.0,12.0,5.0,159.0,4.0,0.0,Zambia


In [6]:
# Missing names in Patents
miss_pat = df_pat.fillna('-')
miss_pat = miss_pat[miss_pat.name == '-']
miss_pat

Unnamed: 0,Origin,alpha_2,Office,Type,1980,1981,1982,1983,1984,1985,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,name
43,Czechoslovakia,CS,Total,Total,6768.0,5447.0,6116.0,6200.0,6267.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-
62,German Democratic Republic,DD,Total,Total,4455.0,5713.0,4125.0,10709.0,11402.0,11487.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-
108,Namibia,0,Total,Total,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,13.0,16.0,14.0,4.0,8.0,9.0,0.0,-
145,Soviet Union,SU,Total,Total,92909.0,96544.0,89305.0,72635.0,62755.0,73282.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-
170,Yugoslavia,YU,Total,Total,79.0,95.0,55.0,126.0,157.0,207.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-
171,Zaire,ZR,Total,Total,3.0,8.0,6.0,6.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-


In [7]:
# Dealing with missing country name in Patents
missing = {
    'German Democratic Republic': 'Germany',
    'Soviet Union': 'Russian Federation'
}
for k in missing.keys():
    i = df_pat[df_pat.Origin == k].index[0]
    df_pat.iloc[i, 46] = missing[k]


In [8]:
# Group values by names
df_pat = df_pat.drop(columns=['Origin', 'alpha_2', 'Office', 'Type']).dropna()
df_pat = df_pat.groupby(['name']).sum()

In [9]:
# Data frame with the population per country/year
df_pop = pd.read_csv(r'population.csv', header=2).drop(
    columns=['Indicator Name', 'Indicator Code']).rename(
        columns={'Country Code': 'alpha_3'})

df_pop

Unnamed: 0,Country Name,alpha_3,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,Unnamed: 66
0,Aruba,ABW,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,59291.0,59522.0,...,102880.0,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,
1,Africa Eastern and Southern,AFE,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,153955516.0,158313235.0,...,567891875.0,583650827.0,600008150.0,616377331.0,632746296.0,649756874.0,667242712.0,685112705.0,702976832.0,
2,Afghanistan,AFG,8622466.0,8790140.0,8969047.0,9157465.0,9355514.0,9565147.0,9783147.0,10010030.0,...,31541209.0,32716210.0,33753499.0,34636207.0,35643418.0,36686784.0,37769499.0,38972230.0,40099462.0,
3,Africa Western and Central,AFW,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,110798486.0,113319950.0,...,387204553.0,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,
4,Angola,AGO,5357195.0,5441333.0,5521400.0,5599827.0,5673199.0,5736582.0,5787044.0,5827503.0,...,26147002.0,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,1106000.0,1135000.0,...,1818117.0,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1790133.0,1786038.0,
262,"Yemen, Rep.",YEM,5542459.0,5646668.0,5753386.0,5860197.0,5973803.0,6097298.0,6228430.0,6368014.0,...,26984002.0,27753304.0,28516545.0,29274002.0,30034389.0,30790513.0,31546691.0,32284046.0,32981641.0,
263,South Africa,ZAF,16520441.0,16989464.0,17503133.0,18042215.0,18603097.0,19187194.0,19789771.0,20410677.0,...,53873616.0,54729551.0,55876504.0,56422274.0,56641209.0,57339635.0,58087055.0,58801927.0,59392255.0,
264,Zambia,ZMB,3119430.0,3219451.0,3323427.0,3431381.0,3542764.0,3658024.0,3777680.0,3901288.0,...,15234976.0,15737793.0,16248230.0,16767761.0,17298054.0,17835893.0,18380477.0,18927715.0,19473125.0,


In [10]:
# Including ISO names in the population data frame
df_pop = pd.merge(
    df_pop, countries[['name', 'alpha_3']],
    how='left', 
    on='alpha_3'
    )

In [11]:
# Missing names in population
miss_pop = df_pop.fillna('-')
miss_pop = miss_pop[miss_pop.name == '-']
miss_pop

Unnamed: 0,Country Name,alpha_3,1960,1961,1962,1963,1964,1965,1966,1967,...,2014,2015,2016,2017,2018,2019,2020,2021,Unnamed: 66,name
1,Africa Eastern and Southern,AFE,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,153955516.0,158313235.0,...,583650827.0,600008150.0,616377331.0,632746296.0,649756874.0,667242712.0,685112705.0,702976832.0,-,-
3,Africa Western and Central,AFW,97256290.0,99314028.0,101445032.0,103667517.0,105959979.0,108336203.0,110798486.0,113319950.0,...,397855507.0,408690375.0,419778384.0,431138704.0,442646825.0,454306063.0,466189102.0,478185907.0,-,-
7,Arab World,ARB,93359407.0,95760348.0,98268683.0,100892507.0,103618568.0,106444103.0,109394536.0,112499764.0,...,397922915.0,406501999.0,415077960.0,423664839.0,432545676.0,441467739.0,449228296.0,456520777.0,-,-
36,Central Europe and the Baltics,CEB,91401764.0,92232738.0,93009498.0,93840016.0,94715795.0,95440988.0,96146336.0,97043270.0,...,103496179.0,103257886.0,102994278.0,102740078.0,102538451.0,102398537.0,102180124.0,101430997.0,-,-
38,Channel Islands,CHI,109186.0,110225.0,111281.0,112410.0,113596.0,114832.0,116116.0,117430.0,...,160912.0,162190.0,163721.0,165215.0,167259.0,169410.0,171113.0,172683.0,-,-
49,Caribbean small states,CSS,4209141.0,4289429.0,4366420.0,4443544.0,4520592.0,4596245.0,4670465.0,4743053.0,...,7181044.0,7224602.0,7265272.0,7303634.0,7374650.0,7424102.0,7444768.0,7481877.0,-,-
61,East Asia & Pacific (excluding high income),EAP,896482332.0,896012881.0,907880207.0,931136006.0,954010411.0,977517019.0,1004358015.0,1030478704.0,...,2034317097.0,2049809214.0,2065223450.0,2080968782.0,2094573278.0,2106439246.0,2116424876.0,2123673456.0,-,-
62,Early-demographic dividend,EAR,979461502.0,1004319366.0,1029962253.0,1056327420.0,1083430197.0,1110603410.0,1137875812.0,1166092667.0,...,3122586392.0,3166642585.0,3210110979.0,3252529883.0,3294298709.0,3335463995.0,3375134276.0,3411889059.0,-,-
63,East Asia & Pacific,EAS,1043333636.0,1045203037.0,1059600211.0,1085398906.0,1110819272.0,1136927045.0,1166227679.0,1194567141.0,...,2278232287.0,2294507020.0,2310721864.0,2327134580.0,2341387076.0,2353862247.0,2363940425.0,2370204347.0,-,-
64,Europe & Central Asia (excluding high income),ECA,255726092.0,259951519.0,264183560.0,268409373.0,272634858.0,276765568.0,280229181.0,283724914.0,...,388842371.0,391695432.0,394321096.0,396482489.0,398076771.0,399592320.0,400811771.0,401575218.0,-,-


In [12]:
# Dropping missing names since they are consolidated data, not countries
df_pop = df_pop.drop(
    columns=[
        'Unnamed: 66',
        'Country Name',
        'alpha_3'
        ]
).dropna().set_index('name')

In [13]:
# Dropping population data prior to 1980
i = df_pop.columns.get_loc('1979')
df_pop = df_pop.iloc[:, i + 1:]

In [14]:
# Filter out population data from countries that do not appear on patent data
df_pop = df_pop[df_pop.index.isin(df_pat.index)]
df_pop

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Angola,8330047.0,8631457.0,8947152.0,9276707.0,9617702.0,9970621.0,10332574.0,10694057.0,11060261.0,11439498.0,...,25188292.0,26147002.0,27128337.0,28127721.0,29154746.0,30208628.0,31273533.0,32353588.0,33428486.0,34503774.0
Albania,2671997.0,2726056.0,2784278.0,2843960.0,2904429.0,2964762.0,3022635.0,3083605.0,3142336.0,3227943.0,...,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837849.0,2811666.0
Andorra,35611.0,36987.0,38598.0,40432.0,42181.0,43809.0,45605.0,47635.0,49654.0,51639.0,...,71013.0,71367.0,71621.0,71746.0,72540.0,73837.0,75013.0,76343.0,77700.0,79034.0
United Arab Emirates,1014048.0,1100180.0,1167856.0,1237572.0,1308331.0,1379536.0,1468697.0,1575909.0,1683681.0,1791840.0,...,8664969.0,8751847.0,8835951.0,8916899.0,8994263.0,9068296.0,9140169.0,9211657.0,9287289.0,9365145.0
Argentina,28024803.0,28471285.0,28922762.0,29377137.0,29832197.0,30287112.0,30748326.0,31216453.0,31690792.0,32165766.0,...,41733271.0,42202935.0,42669500.0,43131966.0,43590368.0,44044811.0,44494502.0,44938712.0,45376763.0,45808747.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Samoa,164905.0,166190.0,166885.0,166944.0,166779.0,166517.0,166365.0,166773.0,167452.0,167886.0,...,198124.0,199939.0,201757.0,203571.0,205544.0,207630.0,209701.0,211905.0,214929.0,218764.0
Yemen,9204938.0,9529105.0,9872292.0,10237391.0,10625687.0,11036918.0,11465444.0,11915563.0,12387238.0,12872362.0,...,26223391.0,26984002.0,27753304.0,28516545.0,29274002.0,30034389.0,30790513.0,31546691.0,32284046.0,32981641.0
South Africa,29463549.0,30232561.0,31022417.0,31865176.0,32768207.0,33752964.0,34877834.0,36119333.0,37393853.0,38668684.0,...,53145033.0,53873616.0,54729551.0,55876504.0,56422274.0,56641209.0,57339635.0,58087055.0,58801927.0,59392255.0
Zambia,5720438.0,5897481.0,6090818.0,6291070.0,6488072.0,6686449.0,6890967.0,7095185.0,7294325.0,7491275.0,...,14744658.0,15234976.0,15737793.0,16248230.0,16767761.0,17298054.0,17835893.0,18380477.0,18927715.0,19473125.0


In [15]:
# Data frame with the GDP per country/year
df_gdp = pd.read_csv(r'gdp.csv', header=4).drop(
    columns=['Indicator Name', 'Indicator Code']).rename(
        columns={'Country Code': 'alpha_3'})

df_gdp

Unnamed: 0,Country Name,alpha_3,1960,1961,1962,1963,1964,1965,1966,1967,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Aruba,ABW,,,,,,,,,...,2.615084e+09,2.727933e+09,2.791061e+09,2.963128e+09,2.983799e+09,3.092179e+09,3.202235e+09,3.368970e+09,2.610039e+09,3.126019e+09
1,Africa Eastern and Southern,AFE,2.129081e+10,2.180870e+10,2.370727e+10,2.821034e+10,2.611906e+10,2.968249e+10,3.223946e+10,3.351491e+10,...,9.725734e+11,9.834729e+11,1.003768e+12,9.245228e+11,8.827213e+11,1.021119e+12,1.007240e+12,1.001017e+12,9.274845e+11,1.080712e+12
2,Afghanistan,AFG,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,...,2.020357e+10,2.056449e+10,2.055058e+10,1.999816e+10,1.801956e+10,1.889635e+10,1.841885e+10,1.890449e+10,2.014344e+10,1.478686e+10
3,Africa Western and Central,AFW,1.040414e+10,1.112789e+10,1.194319e+10,1.267633e+10,1.383837e+10,1.486223e+10,1.583259e+10,1.442604e+10,...,7.360399e+11,8.322169e+11,8.924979e+11,7.669580e+11,6.905454e+11,6.837480e+11,7.663597e+11,7.947191e+11,7.847997e+11,8.401873e+11
4,Angola,AGO,,,,,,,,,...,1.249982e+11,1.334016e+11,1.372444e+11,8.721930e+10,4.984049e+10,6.897277e+10,7.779294e+10,6.930911e+10,5.361907e+10,6.740429e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,,,,,,,,,...,6.163785e+09,6.735731e+09,7.074658e+09,6.295820e+09,6.682833e+09,7.180813e+09,7.878509e+09,7.899879e+09,7.716925e+09,9.412034e+09
262,"Yemen, Rep.",YEM,,,,,,,,,...,3.540132e+10,4.041523e+10,4.322859e+10,4.244450e+10,3.131783e+10,2.684223e+10,2.160616e+10,,,
263,South Africa,ZAF,8.748597e+09,9.225996e+09,9.813996e+09,1.085420e+10,1.195600e+10,1.306899e+10,1.421139e+10,1.582139e+10,...,4.344005e+11,4.008860e+11,3.811989e+11,3.467098e+11,3.235855e+11,3.814488e+11,4.041597e+11,3.885320e+11,3.376197e+11,4.190150e+11
264,Zambia,ZMB,7.130000e+08,6.962857e+08,6.931429e+08,7.187143e+08,8.394286e+08,1.082857e+09,1.264286e+09,1.368000e+09,...,2.550306e+10,2.803724e+10,2.714102e+10,2.125122e+10,2.095841e+10,2.587360e+10,2.631159e+10,2.330867e+10,1.811063e+10,2.214763e+10


In [16]:
# Including ISO names in the GDP data frame
df_gdp = pd.merge(
    df_gdp, countries[['name', 'alpha_3']],
    how='left', 
    on='alpha_3'
    )

In [17]:
# Missing names in GDP
miss_gdp = df_gdp.fillna('-')
miss_gdp = miss_gdp[miss_gdp.name == '-']
miss_gdp

Unnamed: 0,Country Name,alpha_3,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,name
1,Africa Eastern and Southern,AFE,21290811193.816399,21808704493.255299,23707266143.472099,28210335256.195202,26119063726.390499,29682486700.373798,32239462540.956699,33514906531.505299,...,983472863852.06897,1003768056233.72998,924522770394.564941,882721265524.166016,1021118730941.079956,1007240479573.390015,1001017049660.459961,927484450967.307007,1080711605709.630005,-
3,Africa Western and Central,AFW,10404135069.15,11127894641.0191,11943187848.3043,12676330764.6917,13838369295.2313,14862225759.9135,15832591204.0611,14426038229.774799,...,832216894624.603027,892497905712.365967,766957955078.10498,690545418736.156982,683748014299.69397,766359667820.703003,794719102944.19104,784799699387.376953,840187283626.447998,-
7,Arab World,ARB,-,-,-,-,-,-,-,-,...,2843529425181.899902,2895457837893.279785,2538925219911.169922,2516485492775.5,2598094271212.950195,2800428454533.120117,2818502435493.25,2490805099290.140137,2860723497699.22998,-
36,Central Europe and the Baltics,CEB,-,-,-,-,-,-,-,-,...,1416961929448.889893,1462687483688.629883,1292823477883.110107,1316465869392.300049,1461462997744.969971,1649466156164.77002,1675083820501.219971,1664902507747.840088,1901935313869.669922,-
38,Channel Islands,CHI,-,-,-,-,-,-,-,-,...,-,-,-,-,-,-,-,-,-,-
49,Caribbean small states,CSS,1880306125.08675,2038301741.01721,2153895647.51084,2290313587.93533,2470264539.67792,2660946063.37403,2888647977.98333,3102515476.18752,...,75232098978.669601,76921757095.950699,75669285315.658707,71061689479.810898,73590179849.843506,76350610223.412994,77428424853.896194,65896805290.328598,75295300457.2966,-
61,East Asia & Pacific (excluding high income),EAP,81346963310.482895,71614401108.673203,65396585452.5746,70843944144.4599,81762809534.728806,95180592498.501602,104077456326.020996,101006786524.953003,...,11871140152558.300781,12798834538401.599609,13327590633526.199219,13612470859504.0,14877277929156.400391,16640444719655.699219,17207855739652.5,17486916629390.599609,20751789915914.300781,-
62,Early-demographic dividend,EAR,158207028110.518005,159178740736.028992,164016550962.156006,174002727352.371002,199308121633.355988,215883601437.850006,213170163779.562988,224033123597.848999,...,10297645688404.5,10651444170099.699219,10131735163334.400391,10458224448027.400391,11295052797674.599609,11421920659971.400391,11637871027178.0,10849641857292.900391,12633177005843.699219,-
63,East Asia & Pacific,EAS,154634893528.041992,155170778627.410004,158387606831.315002,176729370270.372009,202713926530.742004,225620618889.149994,252105603041.088013,273181770839.36499,...,21410741686661.601562,22086206227634.101562,21997211707026.898438,22772039950316.898438,24324883921969.0,26482255890882.101562,27028297830157.601562,27127309724583.300781,30911686458694.300781,-
64,Europe & Central Asia (excluding high income),ECA,-,-,-,-,-,-,-,-,...,4150229449045.220215,3845830327604.580078,2921395767800.160156,2777730405927.089844,3118194050588.169922,3182536273975.649902,3248640014760.209961,2982626888407.660156,3517372658753.680176,-


In [18]:
# Dropping missing names since they are consolidated data, not countries
df_gdp = df_gdp.drop(
    columns=[
        'Country Name',
        'alpha_3'
        ]
).dropna().set_index('name')

In [19]:
# Dropping population data prior to 1980
i = df_gdp.columns.get_loc('1979')
df_gdp = df_gdp.iloc[:, i + 1:]

In [28]:
# Filter out population data from countries that do not appear on patent data
df_gdp = df_gdp[df_gdp.index.isin(df_pat.index)]
df_gdp = df_gdp/df_pop
df_gdp

In [21]:
# Patents per 100 thousand inhabitants
pat_100k = df_pat/df_pop.div(100000)
pat_100k

Unnamed: 0_level_0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Albania,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.172706,0.103838,0.416565,0.208616,0.000000,0.418647,0.140145,0.000000,0.248963
Algeria,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.110036,0.000000,0.000000,0.199782,0.158654,0.206629,0.083478,0.079615,0.112769,0.235411
Andorra,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,13.543345,14.664125,15.718533,18.018018,22.775008
Angola,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.142013
Argentina,5.677114,2.880095,2.558538,1.831356,1.719619,0.000000,0.643937,2.761364,1.561968,0.000000,...,0.848244,0.971496,0.953843,0.869425,0.864870,0.805997,0.651766,0.818893,0.967456,1.093678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela (Bolivarian Republic of),0.361594,0.370881,0.367106,1.332558,1.285890,0.143659,0.358325,0.447387,0.000000,0.108948,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Viet Nam,0.000000,0.000000,0.000000,0.000000,0.008561,0.000000,0.000000,0.000000,0.000000,0.006110,...,0.062709,0.077547,0.065764,0.095454,0.126709,0.169089,0.261288,0.226569,0.200727,0.232897
Yemen,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.053387,0.040765,0.028825,0.007013,0.000000,0.079908,0.029230,0.076078,0.018585,0.003032
Zambia,0.000000,0.000000,0.032836,0.000000,0.000000,0.149556,0.000000,0.000000,0.000000,0.040047,...,0.013564,0.013128,0.038125,0.024618,0.017891,0.069372,0.028033,0.865048,0.021133,0.000000


In [22]:
years = [y for y in df_pat.columns.values]

In [23]:
# Make figure
fig_dict = {
    'data': [],
    'layout': {},
    'frames': []
}

In [33]:
# Fill layout parameters
fig_dict['layout']['xaxis'] = {
    'range': [df_gdp['1980'].min(), df_gdp['1980'].min()],
    'title': "GDP"
}
fig_dict['layout']['yaxis'] = {
    'range': [pat_100k['1980'].min(), pat_100k['1980'].min()],
    'title': 'Patents (per 100,000 Population)s',
    'type': 'log'
}
fig_dict['layout']['hovermode'] = 'closest'
fig_dict['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [
                    None,
                    {
                        'frame': {'duration':500, 'redraw': False},
                        'fromcurrent': True,
                        'transition': {
                            'duration': 300,
                            'easing': 'quadratic-in-out'
                        },
                    }
                ],
                'label': 'play',
                'method': 'animate'
            },
            {
                'args': [
                    [None],
                    {
                        'frame':{'duration': 0, 'redraw': False},
                        'mode': 'immediate',
                        'transition': {'duration': 0}
                    }
                ],
                'label': 'pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

In [None]:
# Data


In [34]:
fig_dict["layout"]["sliders"] = [sliders_dict]

fig = go.Figure(fig_dict)

fig.show()

In [35]:
url = "https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv"
dataset = pd.read_csv(url)
dataset

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.853030
2,Afghanistan,1962,10267083.0,Asia,31.997,853.100710
3,Afghanistan,1967,11537966.0,Asia,34.020,836.197138
4,Afghanistan,1972,13079460.0,Asia,36.088,739.981106
...,...,...,...,...,...,...
1699,Zimbabwe,1987,9216418.0,Africa,62.351,706.157306
1700,Zimbabwe,1992,10704340.0,Africa,60.377,693.420786
1701,Zimbabwe,1997,11404948.0,Africa,46.809,792.449960
1702,Zimbabwe,2002,11926563.0,Africa,39.989,672.038623
