Here I attempt to add a feature to our theoretical model: I add the fact that patents only have a 75% chance of getting cited by future patents within their technology cluster. 75% came from the estimate in AK2016.

In [2]:
# Import packages
import pandas as pd
import numpy as np
import numpy.linalg as lin
import scipy.stats as sts
import scipy.integrate as intgr
import scipy.optimize as opt
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
#Setting plot colors
cmap1 = matplotlib.cm.get_cmap('summer')
%matplotlib notebook

In [48]:
#Loading data as a pandas dataset
data2 = pd.read_csv('cit_external_comn_share.csv', sep = ',', header = 0, index_col = 0)
#Deleting patents before 1976
data2 = data2[data2.appyear > 1975]

  mask |= (ar1 == a)


In [49]:
abr_cut = 0.1
data2['external_share'] = data2.external_share.replace(np.nan, -1)
level = np.rint(np.size(data2['tot_cit_correct'].dropna())
                                                   - abr_cut * np.size(data2['tot_cit_correct'].
                                                                   dropna()))
cutoff = np.sort(data2['tot_cit_correct'].dropna())[int(level)]
cutoff

10.6616

In [50]:
#This function will be used to determine whether a patent is abrupt or incremental.
#A patent at the top 10% of expected citations is set to be aprupt

def abrupt(cites, cutoff):
    #The cutoff value that defines an abrupt innovation can be adjusted
    if cites >= cutoff:
        return 0
    else:
        return 1
    
#Creating a new column for the indicator variable 'incremental'
data2['Increment'] = data2['tot_cit_correct'].apply(abrupt, cutoff = cutoff)
data2['tot_abr_inc_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_correct']
data2['tot_abr_class_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_class_correct']

#Defining External Patents
#The definition will depend on the share of citations that are external.
#That variable has many missing values for patents with no citations. This line replaces
#those missing values with -1. 
data2['external_share'] = data2.external_share.replace(np.nan, -1)
#Now I define a function to determine whether a patent is external
def external(cites):
    cutoff = 0.5
    if cites >= cutoff:
        return 1
    else:
        return 0
data2['External'] = data2['external_share'].apply(external)
print(np.mean(data2['Increment']))

0.899991542938089


In [40]:
def pdf_eta2(lambda_inc_0, alpha, tau_lambda_int_abr, ndim):
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * ndim
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_k_mod[i-1] * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))
        
    return eta_k_mod

def pdf_eta_norm(lambda_inc_k, tau_lambda_int_abr):
    
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * len(lambda_inc_k)
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_k_mod[i-1] * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))
    
    return eta_k_mod

def pdf_citations(lambda_inc_0, tau_lambda_int_abr, alpha, ndim):
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    etas = pdf_eta_norm(lambda_inc_k, tau_lambda_int_abr)
    pmf = np.zeros(len(etas))
    for i in range(0, len(etas)):
        for j in range(0, len(etas)):
            pmf[i] += etas[j]*sts.binom.pmf(i, j, .75)
    
    return pmf

In [45]:
ndim = 100
lambda_inc_0_test, alpha_test, tau_lambda_int_abr_test = 4.06, .95 , .403
pmf = pdf_citations(lambda_inc_0_test, tau_lambda_int_abr_test, alpha_test, ndim)
print(np.sum(pmf))
pmf2 = pdf_eta2(lambda_inc_0_test, alpha_test, tau_lambda_int_abr_test, ndim)
print(np.sum(pmf2))

dist_pts = np.arange(ndim)
plt.figure(7)
plt.plot(dist_pts, pmf,
         linewidth=2, color='r')
plt.plot(dist_pts, pmf2,
         linewidth=2, color='g')
plt.xlim([0, 30])

1.0
1.0


<IPython.core.display.Javascript object>

(0, 30)

In [56]:
def pdf_eta_norm(lambda_inc_k, tau_lambda_int_abr):
    
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * len(lambda_inc_k)
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_k_mod[i-1] * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))
    
    return eta_k_mod

def pdf_citations(lambda_inc_k, tau_lambda_int_abr):
    etas = pdf_eta_norm(lambda_inc_k, tau_lambda_int_abr)
    pmf = np.zeros(len(etas))
    for i in range(0, len(etas)):
        for j in range(0, len(etas)):
            pmf[i] += etas[j]*sts.binom.pmf(i, j, .75)
    
    return pmf

def log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr):
    lik_array = np.zeros(len(xvals))
    pmf = pdf_eta_norm(lambda_inc_k, tau_lambda_int_abr)
    rounded = np.round(xvals)
    for n in range(0, len(xvals)):
        lik_array[n] = pmf[np.int(rounded[n])]
    
    lik_array[lik_array < 1e-20] = 1e-20
    loglik_array = np.log(lik_array)
    
    return np.sum(loglik_array)

def log_lik2(xvals, lambda_inc_k, alpha, tau_lambda_int_abr):
    lik_array = np.zeros(len(xvals))
    pmf = pdf_citations(lambda_inc_k, tau_lambda_int_abr)
    rounded = np.round(xvals)
    for n in range(0, len(xvals)):
        lik_array[n] = pmf[np.int(rounded[n])]
    
    lik_array[lik_array < 1e-20] = 1e-20
    loglik_array = np.log(lik_array)
    
    return np.sum(loglik_array)

def crit(params, *args):
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    loglik = log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr)
    crit_val = -loglik
    #print(params, crit_val)
    
    return crit_val

def crit2(params, *args):
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    loglik = log_lik2(xvals, lambda_inc_k, alpha, tau_lambda_int_abr)
    crit_val = -loglik
    print(params, crit_val)
    
    return crit_val

In [64]:
cit_abr = data2[(data2.Increment == 0)]

tau_lambda_int_abr_init = 1.5
lambda_inc_0_init = 0.5
alpha_init = 0.9
params_init = np.array([tau_lambda_int_abr_init, lambda_inc_0_init, alpha_init])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 100
W = np.eye(3)
MLE_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna())
for i in range(0, len(MLE_data)):
    if MLE_data[i] > ndim -1:
        MLE_data[i] = ndim-1
gmm_args = (MLE_data, W, ndim)
'''
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
'''
results_MLE = opt.minimize(crit, params_init, args=(gmm_args),
                                 method='Nelder-Mead')

tau_lambda_int_abr_MLE, lambda_inc_0_MLE, alpha_MLE = results_MLE.x
print('tau_lambda_int_abr_MLE=', tau_lambda_int_abr_MLE, 'lambda_inc_0_MLE=', lambda_inc_0_MLE,
      'alpha_MLE=', alpha_MLE)
print('-log likelihood =', results_MLE.fun)

tau_lambda_int_abr_MLE= 0.128123863019 lambda_inc_0_MLE= 0.921868485205 alpha_MLE= 1.02466455386
-log likelihood = 617423.613073


In [62]:
cit_abr = data2[(data2.Increment == 0)]

tau_lambda_int_abr_init = 1.5
lambda_inc_0_init = 0.5
alpha_init = 0.9
params_init = np.array([tau_lambda_int_abr_init, lambda_inc_0_init, alpha_init])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 100
W = np.eye(3)
MLE_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna())
for i in range(0, len(MLE_data)):
    if MLE_data[i] > ndim -1:
        MLE_data[i] = ndim-1

gmm_args = (MLE_data, W, ndim)
'''
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
'''
results_MLE = opt.minimize(crit2, params_init, args=(gmm_args),
                                 method='Nelder-Mead')

tau_lambda_int_abr_MLE, lambda_inc_0_MLE, alpha_MLE = results_MLE.x
print('tau_lambda_int_abr_MLE=', tau_lambda_int_abr_MLE, 'lambda_inc_0_MLE=', lambda_inc_0_MLE,
      'alpha_MLE=', alpha_MLE)
print('-log likelihood =', results_MLE.fun)

[ 1.5  0.5  0.9] 2975035.61419
[ 1.575  0.5    0.9  ] 3008622.44621
[ 1.5    0.525  0.9  ] 2941649.64705
[ 1.5    0.5    0.945] 2780741.05808
[ 1.425       0.51666667  0.93      ] 2786788.68162
[ 1.45        0.52777778  0.95      ] 2689488.68686
[ 1.425       0.54166667  0.975     ] 2525910.34254
[ 1.4         0.51388889  1.        ] 2408444.55556
[ 1.35        0.50833333  1.05      ] 1979936.31993
[ 1.425       0.51666667  1.05      ] 2018989.21781
[ 1.3         0.54444444  1.105     ] 1536513.7524
[ 1.2         0.56666667  1.185     ] 1277775.77857
[ 1.225       0.51944444  1.215     ] 1305962.4253
[ 1.09166667  0.5462963   1.25      ] 1217098.61764
[ 0.925       0.56111111  1.35      ] 1185346.72064
[ 0.88333333  0.58981481  1.45      ] 1237805.49069
[ 0.78055556  0.62561728  1.44166667] 1201033.06992
[ 0.52592593  0.61769547  1.64277778] 1363047.3343
[ 1.03148148  0.57942387  1.29944444] 1182686.07313
[ 0.94135802  0.58762003  1.27740741] 1142270.36105
[ 0.97037037  0.58652263  1.1