## Creating Histograms

In the cells below I import packages, import the summary citation dataset, and create indicator variables for external and abrupt patents. Then histograms and summary statistics are created for the following categories: internal incremental, internal abrupt, and external abrupt.

In [9]:
# Import packages
import pandas as pd
import numpy as np
import numpy.linalg as lin
import scipy.stats as sts
import scipy.integrate as intgr
import scipy.optimize as opt
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
#Setting plot colors
cmap1 = matplotlib.cm.get_cmap('summer')
%matplotlib notebook

In [3]:
#Loading data as a pandas dataset
data2 = pd.read_csv('cit_external_comn_share.csv', sep = ',', header = 0, index_col = 0)
#Deleting patents before 1976
data2 = data2[data2.appyear > 1975]

  mask |= (ar1 == a)


In [4]:
abr_cut = 0.15
data2['external_share'] = data2.external_share.replace(np.nan, -1)
level = np.rint(np.size(data2['tot_cit_correct'].dropna())
                                                   - abr_cut * np.size(data2['tot_cit_correct'].
                                                                   dropna()))
cutoff = np.sort(data2['tot_cit_correct'].dropna())[int(level)]
cutoff

6.6456

In [5]:
#This function will be used to determine whether a patent is abrupt or incremental.
#A patent at the top 10% of expected citations is set to be aprupt

def abrupt(cites, cutoff):
    #The cutoff value that defines an abrupt innovation can be adjusted
    if cites >= cutoff:
        return 0
    else:
        return 1
    
#Creating a new column for the indicator variable 'incremental'
data2['Increment'] = data2['tot_cit_correct'].apply(abrupt, cutoff = cutoff)
data2['tot_abr_inc_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_correct']
data2['tot_abr_class_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_class_correct']

#Defining External Patents
#The definition will depend on the share of citations that are external.
#That variable has many missing values for patents with no citations. This line replaces
#those missing values with -1. 
data2['external_share'] = data2.external_share.replace(np.nan, -1)
#Now I define a function to determine whether a patent is external
def external(cites):
    cutoff = 0.5
    if cites >= cutoff:
        return 1
    else:
        return 0
data2['External'] = data2['external_share'].apply(external)
print(np.mean(data2['Increment']))

0.8500409638936313


In [6]:
print(np.size(data2[data2.External == 1]))
print(np.size(data2[data2.External == 0]))

8709180
29129020


In [7]:
#data3 = data2[data2.icl_class == 'G01N']
#data3.hist(column = 'appyear')

#Plotting internal incremental innovation
#Eliminated observations that are not internal incremental
data3 = data2[data2.Increment == 1]
data3.hist(column = 'tot_cit_correct', bins = 20, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Incremental Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
#Printing out summary statistics
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#External Abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 1)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: External, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Internal abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total abrupt innovation
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: All Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total incremental citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_inc_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Incremental Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_abr_inc_cit_correct),',',
      np.std(data3.tot_abr_inc_cit_correct))

#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(
    data3.tot_abr_class_cit_correct),',', np.std(data3.tot_abr_class_cit_correct))

#Printing the amount of observations that would be "external incremental"
data3 = data2[(data2.Increment == 1) & (data2.External == 1)]
print('Summary Statistics for "External, incremental patents: ')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

<IPython.core.display.Javascript object>

Total, mean, standard deviation: 1608201 , 0.612273047084753 , 1.454573508799488


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 209214 , 21.677423509425964 , 24.17849386286376


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 74495 , 22.08375134304602 , 25.57089218629412


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 283709 , 21.784115196915213 , 24.55240077122959


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 283709 , 6.792641200316211 , 13.748249792403069


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 283709 , 7.679681645279871 , 15.524709610239684
Summary Statistics for "External, incremental patents: 
Total, mean, standard deviation: 226245 , 3.249474606641932 , 1.6387306650420979


## Testing how varying the Parameters affects the distribution (can be used to choose moments)

In [10]:
def pdf_eta2(lambda_inc_0, alpha, tau_lambda_int_abr, ndim):
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * ndim
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_0 * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))**i
    
    return eta_k_mod
ndim = 750
np.sum(pdf_eta2(.12, .99, 9.39, ndim))

dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta2(5.5, .6, .2, 750),
         linewidth=2, color='r')
plt.xlim([0, 30])

<IPython.core.display.Javascript object>

(0, 30)

## Estimation

In [11]:
def pdf_eta(lambda_inc_k, tau_lambda_int_abr):
    
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * len(lambda_inc_k)
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_0 * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))**i
    
    return eta_k_mod

In [12]:
def model_moments(lambda_inc_k, tau_lambda_int_abr):
    
    eta_k_mod = pdf_eta(lambda_inc_k, tau_lambda_int_abr)
#    lambda_inc_mod = np.dot(eta_k_mod, lambda_inc_k)
    eta_k_mean_mod = np.dot(eta_k_mod, np.arange(len(lambda_inc_k)))
    eta_k_var_mod = np.dot(np.multiply(np.arange(len(lambda_inc_k)) - eta_k_mean_mod,
                                     np.arange(len(lambda_inc_k)) - eta_k_mean_mod), eta_k_mod)
    bpct_4_mod = np.dot(eta_k_mod[0:20], np.linspace(0, 19, 20))
#    eta_k_ske_mod = np.dot(np.multiply(np.multiply(((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod),
#                                    ((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod)),
#                                     ((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod))
#                         , eta_k_mod)
    bpct_5_mod = np.dot(eta_k_mod[20:40], np.linspace(20, 39, 20))
    
    return eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod

def data_moments(xvals):
    
    unique, counts = np.unique(xvals, return_counts=True)
    eta_k_data = counts / np.size(np.array(xvals))
#    seq = np.linspace(0, 15490, 15491)
#    lambda_inc_k_data = np.array(0.8 * 0.95**seq)
#    lambda_inc_data = np.dot(eta_k_data, lambda_inc_k_data)
    bpct_4_data = np.dot(eta_k_data[0:20], np.linspace(0, 19, 20))
    bpct_5_data = np.dot(eta_k_data[20:40], np.linspace(20, 39, 20))
    
    return np.mean(xvals), np.var(xvals), bpct_4_data, bpct_5_data

def err_vec(xvals, lambda_inc_k, tau_lambda_int_abr, simple):
    
    eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod = model_moments(
        lambda_inc_k, tau_lambda_int_abr)
    moms_mod = np.array([[eta_k_mean_mod], [eta_k_var_mod], [bpct_4_mod], [bpct_5_mod]])
    eta_k_mean_data, eta_k_var_data, bpct_4_data, bpct_5_data = data_moments(xvals)
    moms_data = np.array([[eta_k_mean_data], [eta_k_var_data], [bpct_4_data], [bpct_5_data]])
    if simple:
        err_vec = moms_mod - moms_data
    else:
        err_vec = (moms_mod - moms_data) / moms_data
    
    return err_vec

def crit_abr_distr(params, *args):
    
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    err = err_vec(xvals, lambda_inc_k, tau_lambda_int_abr, simple=False)
    crit_val = np.dot(np.dot(err.T, W), err) 
    
    return crit_val

In [13]:
cit_abr = data2[(data2.Increment == 0)]
abr_dist_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna().value_counts()) / np.size(
    np.array(cit_abr.tot_abr_class_cit_correct.dropna()))
np.size(abr_dist_data)
np.max(cit_abr.tot_cit_correct)

739.26779999999997

In [14]:
tau_lambda_int_abr_init = 0.5
lambda_inc_0_init = 2
alpha_init = 0.95
W_hat = np.eye(4)
#W_hat = np.diag((1000000,100,1))
params_init = np.array([tau_lambda_int_abr_init, lambda_inc_0_init, alpha_init])
bnds = ((1e-5, None),(1e-5, None),(0.5, 1-1e-5))
ndim = 750 #This is the number of dimensions that we will sum over. I use 750 because 741 is the maximum in the data.
gmm_args = (np.array(cit_abr.tot_abr_class_cit_correct.dropna()), W_hat, ndim)
results_abr_distr = opt.minimize(crit_abr_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
tau_lambda_int_abr_GMM, lambda_inc_0_GMM, alpha_GMM = results_abr_distr.x
print('tau_lambda_int_abr_GMM=', tau_lambda_int_abr_GMM, 'lambda_inc_0_GMM=', lambda_inc_0_GMM,
      'alpha_GMM=', alpha_GMM)

tau_lambda_int_abr_GMM= 9.3313348249 lambda_inc_0_GMM= 0.181236768347 alpha_GMM= 0.993691321843


In [15]:
results_abr_distr

      fun: array([[ 2.99479273]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([ -4.44089210e-08,   8.88178420e-07,  -1.27009514e-05])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 68
      nit: 16
   status: 0
  success: True
        x: array([ 9.33133482,  0.18123677,  0.99369132])

In [16]:
seq = np.arange(ndim)
lambda_inc_k_GMM = np.array(lambda_inc_0_GMM * alpha_GMM**seq)
eta_k_mean_data, eta_k_var_data, bpct_4_data, bpct_5_data = data_moments(np.array(
    cit_abr.tot_abr_class_cit_correct.dropna()))
eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod = model_moments(lambda_inc_k_GMM,
                                                              tau_lambda_int_abr_GMM)

err1 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM, tau_lambda_int_abr_GMM, False).reshape(4,)
print('eta_k_mean_mod = ', eta_k_mean_mod, 'eta_k_var_mod = ', eta_k_var_mod,
      'bpct_4_mod = ', bpct_4_mod, 'bpct_5_mod = ', bpct_5_mod)
print('eta_k_mean_data = ', eta_k_mean_data, 'eta_k_var_data = ', eta_k_var_data,
      'bpct_4_data = ', bpct_4_data, 'bpct_5_data = ', bpct_5_data)
print('Error vector=', err1)

eta_k_mean_mod =  0.0194152350359 eta_k_var_mod =  0.0197825296606 bpct_4_mod =  0.0194152350359 bpct_5_mod =  7.47715555201e-35
eta_k_mean_data =  7.67968164528 eta_k_var_data =  241.016608483 bpct_4_data =  0.0193649126394 bpct_5_data =  0.0720914740103
Error vector= [-0.99747187 -0.99991792  0.00259864 -1.        ]


In [17]:
sum_eta = np.sum(pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM))
lambda_inc_GMM = np.dot(pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM), lambda_inc_k_GMM)
print(sum_eta, lambda_inc_GMM, tau_lambda_int_abr_GMM)

0.999997681374 0.181214152121 9.3313348249


In [18]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM),
         linewidth=2, color='r')
plt.xlim([0, 30])

<IPython.core.display.Javascript object>

(0, 30)

In [19]:
err1 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM, tau_lambda_int_abr_GMM, False)
VCV2 = np.dot(err1, err1.T) / np.size(np.array(cit_abr.tot_abr_class_cit_correct.dropna()))
print(VCV2)
W_hat2 = lin.pinv(VCV2)  # Use the pseudo-inverse calculated by SVD because VCV2 is ill-conditioned
print(W_hat2)

[[  3.50693891e-06   3.51553880e-06  -9.13636239e-09   3.51582738e-06]
 [  3.51553880e-06   3.52415978e-06  -9.15876703e-09   3.52444907e-06]
 [ -9.13636239e-09  -9.15876703e-09   2.38022731e-11  -9.15951884e-09]
 [  3.51582738e-06   3.52444907e-06  -9.15951884e-09   3.52473838e-06]]
[[  3.14731989e+04   3.15503790e+04  -8.19947417e+01   3.15529689e+04]
 [  3.15503790e+04   3.16277484e+04  -8.21958134e+01   3.16303446e+04]
 [ -8.19947417e+01  -8.21958134e+01   2.13614692e-01  -8.22025605e+01]
 [  3.15529689e+04   3.16303446e+04  -8.22025605e+01   3.16329411e+04]]


In [36]:
params_init = np.array([tau_lambda_int_abr_GMM - 9.25, lambda_inc_0_GMM, alpha_GMM])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 750
gmm_args = (np.array(cit_abr.tot_abr_class_cit_correct.dropna()), W_hat2, ndim)
results2_abr_distr = opt.minimize(crit_abr_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
tau_lambda_int_abr_GMM2, lambda_inc_0_GMM2, alpha_GMM2 = results2_abr_distr.x
print('tau_lambda_int_abr_GMM2=', tau_lambda_int_abr_GMM2, 'lambda_inc_0_GMM2=', lambda_inc_0_GMM2,
      'alpha_GMM2=', alpha_GMM2)

tau_lambda_int_abr_GMM2= 0.0807742803505 lambda_inc_0_GMM2= 0.352539852577 alpha_GMM2= 0.993734658666


In [37]:
results2_abr_distr

      fun: array([[  6.56545105e-09]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  2.9913305 ,   1.40007919,  35.19113421])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 124
      nit: 6
   status: 0
  success: True
        x: array([ 0.08077428,  0.35253985,  0.99373466])

In [38]:
seq = np.arange(ndim)
lambda_inc_k_GMM2 = np.array(lambda_inc_0_GMM2 * alpha_GMM2**seq)
eta_k_mean_data2, eta_k_var_data2, bpct_4_data2, bpct_5_data2 = data_moments(np.array(
    cit_abr.tot_abr_class_cit_correct.dropna()))
eta_k_mean_mod2, eta_k_var_mod2, bpct_4_mod2, bpct_5_mod2 = model_moments(lambda_inc_k_GMM2,
                                                                 tau_lambda_int_abr_GMM2)

err2 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2, False).reshape(4,)
print('eta_k_mean_mod2 = ', eta_k_mean_mod2, 'eta_k_var_mod2 = ', eta_k_var_mod2,
      'bpct_4_mod2 = ', bpct_4_mod2, 'bpct_5_mod2 = ', bpct_5_mod2)
print('eta_k_mean_data2 = ', eta_k_mean_data2, 'eta_k_var_data2 = ', eta_k_var_data2,
      'bpct_4_data2 = ', bpct_4_data2, 'bpct_5_data2 = ', bpct_5_data2)
print('Error vector=', err2)

eta_k_mean_mod2 =  3.97753584591 eta_k_var_mod2 =  18.4176171016 bpct_4_mod2 =  3.76672746862 bpct_5_mod2 =  0.209586495929
eta_k_mean_data2 =  7.67968164528 eta_k_var_data2 =  241.016608483 bpct_4_data2 =  0.0193649126394 bpct_5_data2 =  0.0720914740103
Error vector= [  -0.48207022   -0.92358362  193.51301117    1.90723   ]


In [39]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2),
         linewidth=2, color='g')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0xd08ecc0>]

In [40]:
sum_eta = np.sum(pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2))
lambda_inc_GMM2 = np.dot(pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2), lambda_inc_k_GMM2)
print(sum_eta, lambda_inc_GMM2, tau_lambda_int_abr_GMM2)

0.97993079744 0.336885988332 0.0807742803505


In [41]:
def log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr):
    lik_array = np.zeros(len(xvals))
    pmf = pdf_eta(lambda_inc_k, tau_lambda_int_abr)
    rounded = np.round(xvals)
    for n in range(0, len(xvals)):
        lik_array[n] = pmf[np.int(rounded[n])]
    
    lik_array[lik_array < 1e-20] = 1e-20
    loglik_array = np.log(lik_array)
    
    return np.sum(loglik_array)

def crit_log_lik(params, *args):
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    loglik = log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr)
    crit_val = -loglik
    print(params, crit_val)
    
    return crit_val

In [42]:
tau_lambda_int_abr_init = 0.6
lambda_inc_0_init = 0.5
alpha_init = 0.6
params_init = np.array([tau_lambda_int_abr_init, lambda_inc_0_init, alpha_init])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 750
W = np.eye(3)
MLE_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna())
gmm_args = (MLE_data, W, ndim)
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
'''
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='Nelder-Mead')
'''
tau_lambda_int_abr_MLE, lambda_inc_0_MLE, alpha_MLE = results_MLE.x
print('tau_lambda_int_abr_MLE=', tau_lambda_int_abr_MLE, 'lambda_inc_0_MLE=', lambda_inc_0_MLE,
      'alpha_MLE=', alpha_MLE)

[ 0.6  0.5  0.6] 4809277.26519
[ 0.60000001  0.5         0.6       ] 4809277.27247
[ 0.6         0.50000001  0.6       ] 4809277.25645
[ 0.6         0.5         0.60000001] 4809277.22148
[ 0.59999931  1.5         0.60000046] 4320200.22096
[ 0.59999932  1.5         0.60000046] 4320200.22771
[ 0.59999931  1.50000001  0.60000046] 4320200.21825
[ 0.59999931  1.5         0.60000047] 4320200.16821
[ 0.26770172  2.517121    0.99999   ] 884231.995484
[ 0.26770173  2.517121    0.99999   ] 884231.993659
[ 0.26770172  2.51712101  0.99999   ] 884231.995678
[ 0.26770172  2.517121    0.99999001] 884231.94166
[ 0.34401151  2.51614038  0.99999   ] 879046.082746
[ 0.34401152  2.51614038  0.99999   ] 879046.083023
[ 0.34401151  2.51614039  0.99999   ] 879046.082708
[ 0.34401151  2.51614038  0.99999001] 879046.012315
[ 0.33405779  2.51723019  0.99999   ] 878874.984599
[ 0.3340578   2.51723019  0.99999   ] 878874.984658
[ 0.33405779  2.5172302   0.99999   ] 878874.984591
[ 0.33405779  2.51723019  0.999990

In [43]:
print(results_MLE)
print(crit_log_lik((tau_lambda_int_abr_MLE, lambda_inc_0_MLE, 1.0), MLE_data, W, ndim))

      fun: 878867.03941513936
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  3.49245965e-02,  -3.49245965e-02,  -6.76931263e+06])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 32
      nit: 7
   status: 0
  success: True
        x: array([ 0.3314279 ,  2.51747937,  0.99999   ])
(0.33142789638702963, 2.5174793748758773, 1.0) 878799.385169
878799.385169


In [44]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta2(lambda_inc_0_MLE, alpha_MLE, tau_lambda_int_abr_MLE, ndim),
         linewidth=2, color='r')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x21edbc50>]

In [45]:
seq = np.arange(ndim)
lambda_inc_k_MLE = np.array(lambda_inc_0_MLE * alpha_MLE**seq)
sum_eta = np.sum(pdf_eta(lambda_inc_k_MLE, tau_lambda_int_abr_MLE))
lambda_inc_MLE = np.dot(pdf_eta(lambda_inc_k_MLE, tau_lambda_int_abr_MLE), lambda_inc_k_MLE)
print(sum_eta, lambda_inc_MLE, tau_lambda_int_abr_MLE)

0.999932899119 2.51711929652 0.331427896387


### Robustness

In [46]:
#other moments
#share of incremental patents
cit_inc = data2[(data2.Increment == 1)]
share_inc_data = np.size(np.array(cit_inc.patent)) / (np.size(np.array(cit_inc.patent)) +
                                     np.size(np.array(cit_abr.patent)))
share_inc_mod = lambda_inc_GMM / (lambda_inc_GMM + tau_lambda_int_abr_GMM)

print('share_inc_data = ', share_inc_data, 'share_inc_mod = ', share_inc_mod)

#RD intensity
RD_intens_mod = 0.106 * 0.346 * (1 / (share_inc_mod)) * np.dot(pdf_eta(lambda_inc_k_GMM,
                                                                   tau_lambda_int_abr_GMM),
                                             np.multiply(lambda_inc_k_GMM, lambda_inc_k_GMM)
                                            ) / 0.075547

RD_intens_data = 0.06797109
print('RD_intens_data = ', RD_intens_data, 'RD_intens_mod = ', RD_intens_mod)

share_inc_data =  0.8500409638936313 share_inc_mod =  0.0190500098932
RD_intens_data =  0.06797109 RD_intens_mod =  0.836864426185


In [47]:
#other moments
#share of incremental patents
cit_inc = data2[(data2.Increment == 1)]
share_inc_data = np.size(np.array(cit_inc.patent)) / (np.size(np.array(cit_inc.patent)) +
                                     np.size(np.array(cit_abr.patent)))
share_inc_mod2 = lambda_inc_GMM2 / (lambda_inc_GMM2 + tau_lambda_int_abr_GMM2)

print('share_inc_data = ', share_inc_data, 'share_inc_mod2 = ', share_inc_mod2)

#RD intensity
RD_intens_mod2 = 0.106 * 0.346 * (1 / (share_inc_mod2)) * np.dot(pdf_eta(lambda_inc_k_GMM2,
                                                                   tau_lambda_int_abr_GMM2),
                                             np.multiply(lambda_inc_k_GMM2, lambda_inc_k_GMM2)
                                            ) / 0.075547
RD_intens_data = 0.06797109
print('RD_intens_data = ', RD_intens_data, 'RD_intens_mod2 = ', RD_intens_mod2)

share_inc_data =  0.8500409638936313 share_inc_mod2 =  0.80660291053
RD_intens_data =  0.06797109 RD_intens_mod2 =  0.0697561965886


In [48]:
#other moments
#share of incremental patents
cit_inc = data2[(data2.Increment == 1)]
share_inc_data = np.size(np.array(cit_inc.patent)) / (np.size(np.array(cit_inc.patent)) +
                                     np.size(np.array(cit_abr.patent)))
share_inc_mod3 = lambda_inc_MLE / (lambda_inc_MLE + tau_lambda_int_abr_MLE)

print('share_inc_data = ', share_inc_data, 'share_inc_mod3 = ', share_inc_mod3)

#RD intensity
RD_intens_mod3 = 0.106 * 0.346 * (1 / (share_inc_mod3)) * np.dot(pdf_eta(lambda_inc_k_MLE,
                                                                   tau_lambda_int_abr_MLE),
                                             np.multiply(lambda_inc_k_MLE, lambda_inc_k_MLE)
                                            ) / 0.075547
RD_intens_data = 0.06797109
print('RD_intens_data = ', RD_intens_data, 'RD_intens_mod3 = ', RD_intens_mod3)

share_inc_data =  0.8500409638936313 share_inc_mod3 =  0.883650199929
RD_intens_data =  0.06797109 RD_intens_mod3 =  3.48113691273


## Entry Rate Calculations

In [5]:
Patent_evol = pd.read_csv('evol_pat_year_count_df.csv', sep = ',', header = 0, index_col = 0)
Innov_emp = pd.read_csv('Innov_emp.csv', sep = ',', header = 0, index_col = 0)
#Entry rate per 5 years (by employment)
entry_rate1976 = Patent_evol.emp[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1981 = Patent_evol.emp[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1986 = Patent_evol.emp[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1991 = Patent_evol.emp[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1996 = Patent_evol.emp[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate2001= Patent_evol.emp[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)

Entry rates: 
1976 - 1980 =  0.0021245205023886736 
1981 - 1985 =  0.0023513519177680454 
1986 - 1990 =  0.002191138646583526 
1991 - 1995 =  0.001965464743272693 
1996 - 2000 =  0.0044289127179007156 
2001 - 2005 =  0.0018598440948136667


In [6]:
#Entry rate per 5 years (by nb firms)
entry_rate1976 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1976) & 
                                                          (Innov_emp.fyear < 1981)].sum()
entry_rate1981 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1981) & 
                                                          (Innov_emp.fyear < 1986)].sum()
entry_rate1986 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1986) & 
                                                          (Innov_emp.fyear < 1991)].sum()
entry_rate1991 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1991) & 
                                                          (Innov_emp.fyear < 1996)].sum()
entry_rate1996 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1996) & 
                                                          (Innov_emp.fyear < 2001)].sum()
entry_rate2001 = Patent_evol.firstYear[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 2001) & 
                                                          (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)
entry_rate = np.mean(np.array([entry_rate1976, entry_rate1981, entry_rate1986, entry_rate1991
                             , entry_rate1996, entry_rate2001]))
print('Mean entry rate = ', entry_rate)
lambda_e = entry_rate

Entry rates: 
1976 - 1980 =  0.0704656355967 
1981 - 1985 =  0.0591484464902 
1986 - 1990 =  0.0625711845103 
1991 - 1995 =  0.067709919016 
1996 - 2000 =  0.0710200455328 
2001 - 2005 =  0.0266078444889
Mean entry rate =  0.0595871792725
