## Creating Histograms

In the cells below I import packages, import the summary citation dataset, and create indicator variables for external and abrupt patents. Then histograms and summary statistics are created for the following categories: internal incremental, internal abrupt, and external abrupt.

In [1]:
# Import packages
import pandas as pd
import numpy as np
import numpy.linalg as lin
import scipy.stats as sts
import scipy.integrate as intgr
import scipy.optimize as opt
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
#Setting plot colors
cmap1 = matplotlib.cm.get_cmap('summer')
%matplotlib notebook

In [2]:
#Loading data as a pandas dataset
data2 = pd.read_csv('cit_external_comn_share.csv', sep = ',', header = 0, index_col = 0)
#Deleting patents before 1976
data2 = data2[data2.appyear > 1975]

  mask |= (ar1 == a)


In [3]:
data2['external_share'] = data2.external_share.replace(np.nan, -1)
level = np.rint(np.size(data2['tot_cit_correct'].dropna())
                                                   - 0.1 * np.size(data2['tot_cit_correct'].
                                                                   dropna()))
cutoff = np.sort(data2['tot_cit_correct'].dropna())[int(level)]
cutoff

10.6616

In [4]:
#This function will be used to determine whether a patent is abrupt or incremental.
#A patent at the top 10% of expected citations is set to be aprupt

def abrupt(cites, cutoff):
    #The cutoff value that defines an abrupt innovation can be adjusted
    if cites >= cutoff:
        return 0
    else:
        return 1
    
#Creating a new column for the indicator variable 'incremental'
data2['Increment'] = data2['tot_cit_correct'].apply(abrupt, cutoff = cutoff)
data2['tot_abr_inc_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_correct']
data2['tot_abr_class_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_class_correct']

#Defining External Patents
#The definition will depend on the share of citations that are external.
#That variable has many missing values for patents with no citations. This line replaces
#those missing values with -1. 
data2['external_share'] = data2.external_share.replace(np.nan, -1)
#Now I define a function to determine whether a patent is external
def external(cites):
    cutoff = 0.5
    if cites >= cutoff:
        return 1
    else:
        return 0
data2['External'] = data2['external_share'].apply(external)
print(np.mean(data2['Increment']))

0.899991542938089


In [9]:
print(np.size(data2[data2.External == 1]))
print(np.size(data2[data2.External == 0]))

8709180
29129020


In [349]:
#data3 = data2[data2.icl_class == 'G01N']
#data3.hist(column = 'appyear')

#Plotting internal incremental innovation
#Eliminated observations that are not internal incremental
data3 = data2[data2.Increment == 1]
data3.hist(column = 'tot_cit_correct', bins = 20, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Incremental Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
#Printing out summary statistics
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#External Abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 1)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: External, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Internal abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total abrupt innovation
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: All Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total incremental citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_inc_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Incremental Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_abr_inc_cit_correct),',',
      np.std(data3.tot_abr_inc_cit_correct))

#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(
    data3.tot_abr_class_cit_correct),',', np.std(data3.tot_abr_class_cit_correct))

#Printing the amount of observations that would be "external incremental"
data3 = data2[(data2.Increment == 1) & (data2.External == 1)]
print('Summary Statistics for "External, incremental patents: ')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

<IPython.core.display.Javascript object>

Total, mean, standard deviation: 1702703 , 1.0452714309813296 , 2.293187572379963


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 139227 , 28.339301601706214 , 27.29738437627662


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 49980 , 28.811253965590097 , 28.92134667414797


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 28.463970240531847 , 27.736385607322156


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 8.850634295773176 , 16.309264325805366


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 10.06398987669824 , 18.42557679906264
Summary Statistics for "External, incremental patents: 
Total, mean, standard deviation: 296232 , 4.47239161219741 , 2.6815898976775547


## Testing how varying the Parameters affects the distribution (can be used to choose moments)

In [67]:
def pdf_eta2(lambda_inc_0, alpha, tau_lambda_int_abr, ndim):
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * ndim
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_0 * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))**i
    
    return eta_k_mod
ndim = 750
np.sum(pdf_eta2(.12, .99, 9.39, ndim))

dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta2(5.5, .6, .2, 750),
         linewidth=2, color='r')
plt.xlim([0, 30])

(0, 30)

## Estimation (in progress)

In [35]:
def pdf_eta(lambda_inc_k, tau_lambda_int_abr):
    
    eta_0 = (tau_lambda_int_abr)/(lambda_inc_k[0] + tau_lambda_int_abr)
    eta_k_mod = [None] * len(lambda_inc_k)
    eta_k_mod[0] = eta_0
    for i in range(1, np.size(lambda_inc_k)):
        eta_k_mod[i] = eta_0 * ((lambda_inc_k[i-1])/(lambda_inc_k[i] + tau_lambda_int_abr))**i
    
    return eta_k_mod

In [38]:
def model_moments(lambda_inc_k, tau_lambda_int_abr):
    
    eta_k_mod = pdf_eta(lambda_inc_k, tau_lambda_int_abr)
#    lambda_inc_mod = np.dot(eta_k_mod, lambda_inc_k)
    eta_k_mean_mod = np.dot(eta_k_mod, np.arange(len(lambda_inc_k)))
    eta_k_var_mod = np.dot(np.multiply(np.arange(len(lambda_inc_k)) - eta_k_mean_mod,
                                     np.arange(len(lambda_inc_k)) - eta_k_mean_mod), eta_k_mod)
    bpct_4_mod = np.dot(eta_k_mod[0:20], np.linspace(0, 19, 20))
#    eta_k_ske_mod = np.dot(np.multiply(np.multiply(((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod),
#                                    ((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod)),
#                                     ((np.linspace(0, 15490, 15491) - eta_k_mean_mod)/eta_k_var_mod))
#                         , eta_k_mod)
    bpct_5_mod = np.dot(eta_k_mod[20:40], np.linspace(20, 39, 20))
    
    return eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod

def data_moments(xvals):
    
    unique, counts = np.unique(xvals, return_counts=True)
    eta_k_data = counts / np.size(np.array(xvals))
#    seq = np.linspace(0, 15490, 15491)
#    lambda_inc_k_data = np.array(0.8 * 0.95**seq)
#    lambda_inc_data = np.dot(eta_k_data, lambda_inc_k_data)
    bpct_4_data = np.dot(eta_k_data[0:20], np.linspace(0, 19, 20))
    bpct_5_data = np.dot(eta_k_data[20:40], np.linspace(20, 39, 20))
    
    return np.mean(xvals), np.var(xvals), bpct_4_data, bpct_5_data

def err_vec(xvals, lambda_inc_k, tau_lambda_int_abr, simple):
    
    eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod = model_moments(
        lambda_inc_k, tau_lambda_int_abr)
    moms_mod = np.array([[eta_k_mean_mod], [eta_k_var_mod], [bpct_4_mod], [bpct_5_mod]])
    eta_k_mean_data, eta_k_var_data, bpct_4_data, bpct_5_data = data_moments(xvals)
    moms_data = np.array([[eta_k_mean_data], [eta_k_var_data], [bpct_4_data], [bpct_5_data]])
    if simple:
        err_vec = moms_mod - moms_data
    else:
        err_vec = (moms_mod - moms_data) / moms_data
    
    return err_vec

def crit_abr_distr(params, *args):
    
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    err = err_vec(xvals, lambda_inc_k, tau_lambda_int_abr, simple=False)
    crit_val = np.dot(np.dot(err.T, W), err) 
    
    return crit_val

In [26]:
cit_abr = data2[(data2.Increment == 0)]
abr_dist_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna().value_counts()) / np.size(
    np.array(cit_abr.tot_abr_class_cit_correct.dropna()))
np.size(abr_dist_data)
np.max(cit_abr.tot_cit_correct)

739.26779999999997

In [39]:
tau_lambda_int_abr_init = 0.5
lambda_inc_0_init = 2
alpha_init = 0.95
W_hat = np.eye(4)
#W_hat = np.diag((1000000,100,1))
params_init = np.array([tau_lambda_int_abr_init, lambda_inc_0_init, alpha_init])
bnds = ((1e-5, None),(1e-5, None),(0.5, 1-1e-5))
ndim = 750 #This is the number of dimensions that we will sum over. I use 750 because 741 is the maximum in the data.
gmm_args = (np.array(cit_abr.tot_abr_class_cit_correct.dropna()), W_hat, ndim)
results_abr_distr = opt.minimize(crit_abr_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
tau_lambda_int_abr_GMM, lambda_inc_0_GMM, alpha_GMM = results_abr_distr.x
print('tau_lambda_int_abr_GMM=', tau_lambda_int_abr_GMM, 'lambda_inc_0_GMM=', lambda_inc_0_GMM,
      'alpha_GMM=', alpha_GMM)

tau_lambda_int_abr_GMM= 9.38522746151 lambda_inc_0_GMM= 0.121329751573 alpha_GMM= 0.991596864946


In [338]:
results_abr_distr

      fun: array([[ 2.99735804]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  4.44089210e-08,   3.99680289e-07,  -3.86357613e-06])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 68
      nit: 16
   status: 0
  success: True
        x: array([ 9.38522746,  0.12132975,  0.99159686])

In [40]:
seq = np.arange(ndim)
lambda_inc_k_GMM = np.array(lambda_inc_0_GMM * alpha_GMM**seq)
eta_k_mean_data, eta_k_var_data, bpct_4_data, bpct_5_data = data_moments(np.array(
    cit_abr.tot_abr_class_cit_correct.dropna()))
eta_k_mean_mod, eta_k_var_mod, bpct_4_mod, bpct_5_mod = model_moments(lambda_inc_k_GMM,
                                                              tau_lambda_int_abr_GMM)

err1 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM, tau_lambda_int_abr_GMM, False).reshape(4,)
print('eta_k_mean_mod = ', eta_k_mean_mod, 'eta_k_var_mod = ', eta_k_var_mod,
      'bpct_4_mod = ', bpct_4_mod, 'bpct_5_mod = ', bpct_5_mod)
print('eta_k_mean_data = ', eta_k_mean_data, 'eta_k_var_data = ', eta_k_var_data,
      'bpct_4_data = ', bpct_4_data, 'bpct_5_data = ', bpct_5_data)
print('Error vector=', err1)

eta_k_mean_mod =  0.0129235324495 eta_k_var_mod =  0.0130848923498 bpct_4_mod =  0.0129235324495 bpct_5_mod =  1.10435466919e-38
eta_k_mean_data =  10.0639898767 eta_k_var_data =  339.501880378 bpct_4_data =  0.012906499231 bpct_5_data =  0.053496963643
Error vector= [-0.99871586 -0.99996146  0.00131974 -1.        ]


In [340]:
sum_eta = np.sum(pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM))
lambda_inc_GMM = np.dot(pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM), lambda_inc_k_GMM)
print(sum_eta, lambda_inc_GMM, tau_lambda_int_abr_GMM)

0.999998625776 0.121316410054 9.38522746151


In [42]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta(lambda_inc_k_GMM, tau_lambda_int_abr_GMM),
         linewidth=2, color='r')
plt.xlim([0, 30])

<IPython.core.display.Javascript object>

(0, 30)

In [330]:
err1 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM, tau_lambda_int_abr_GMM, False)
VCV2 = np.dot(err1, err1.T) / np.size(np.array(cit_abr.tot_abr_class_cit_correct.dropna()))
print(VCV2)
W_hat2 = lin.pinv(VCV2)  # Use the pseudo-inverse calculated by SVD because VCV2 is ill-conditioned
print(W_hat2)

[[  5.28412863e-06   5.28465652e-06  -5.55973449e-10   5.28467265e-06]
 [  5.28465652e-06   5.28518446e-06  -5.56028991e-10   5.28520059e-06]
 [ -5.55973449e-10  -5.56028991e-10   5.84971520e-14  -5.56030688e-10]
 [  5.28467265e-06   5.28520059e-06  -5.56030688e-10   5.28521672e-06]]
[[  2.10216424e+04   2.10237425e+04  -2.21180744e+00   2.10238067e+04]
 [  2.10237425e+04   2.10258428e+04  -2.21202840e+00   2.10259069e+04]
 [ -2.21180744e+00  -2.21202840e+00   2.32716933e-04  -2.21203515e+00]
 [  2.10238067e+04   2.10259069e+04  -2.21203515e+00   2.10259711e+04]]


In [342]:
params_init = np.array([tau_lambda_int_abr_GMM, lambda_inc_0_GMM, alpha_GMM])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 750
gmm_args = (np.array(cit_abr.tot_abr_class_cit_correct.dropna()), W_hat2, ndim)
results2_abr_distr = opt.minimize(crit_abr_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
tau_lambda_int_abr_GMM2, lambda_inc_0_GMM2, alpha_GMM2 = results2_abr_distr.x
print('tau_lambda_int_abr_GMM2=', tau_lambda_int_abr_GMM2, 'lambda_inc_0_GMM2=', lambda_inc_0_GMM2,
      'alpha_GMM2=', alpha_GMM2)

tau_lambda_int_abr_GMM2= 9.1056327884 lambda_inc_0_GMM2= 37.8097137599 alpha_GMM2= 0.99184402189


In [343]:
results2_abr_distr

      fun: array([[  1.14114707e-09]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  1.44259597e-02,  -2.85133141e-03,   1.31420576e+01])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 188
      nit: 5
   status: 0
  success: True
        x: array([  9.10563279,  37.80971376,   0.99184402])

In [344]:
seq = np.arange(ndim)
lambda_inc_k_GMM2 = np.array(lambda_inc_0_GMM2 * alpha_GMM2**seq)
eta_k_mean_data2, eta_k_var_data2, bpct_4_data2, bpct_5_data2 = data_moments(np.array(
    cit_abr.tot_abr_class_cit_correct.dropna()))
eta_k_mean_mod2, eta_k_var_mod2, bpct_4_mod2, bpct_5_mod2 = model_moments(lambda_inc_k_GMM2,
                                                                 tau_lambda_int_abr_GMM2)

err2 = err_vec(np.array(cit_abr.tot_abr_class_cit_correct.dropna())
               , lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2, False).reshape(4,)
print('eta_k_mean_mod2 = ', eta_k_mean_mod2, 'eta_k_var_mod2 = ', eta_k_var_mod2,
      'bpct_4_mod2 = ', bpct_4_mod2, 'bpct_5_mod2 = ', bpct_5_mod2)
print('eta_k_mean_data2 = ', eta_k_mean_data2, 'eta_k_var_data2 = ', eta_k_var_data2,
      'bpct_4_data2 = ', bpct_4_data2, 'bpct_5_data2 = ', bpct_5_data2)
print('Error vector=', err2)

eta_k_mean_mod2 =  3.71214985664 eta_k_var_mod2 =  16.0495785278 bpct_4_mod2 =  3.57201595963 bpct_5_mod2 =  0.139777653382
eta_k_mean_data2 =  10.0639898767 eta_k_var_data2 =  339.501880378 bpct_4_data2 =  0.012906499231 bpct_5_data2 =  0.053496963643
Error vector= [  -0.63114531   -0.9527261   275.76102526    1.61281471]


In [345]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2),
         linewidth=2, color='g')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x26069470>]

In [296]:
sum_eta = np.sum(pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2))
lambda_inc_GMM2 = np.dot(pdf_eta(lambda_inc_k_GMM2, tau_lambda_int_abr_GMM2), lambda_inc_k_GMM2)
print(sum_eta, lambda_inc_GMM2, tau_lambda_int_abr_GMM2)

0.999972504547 31.4379334063 8.91473091643


In [72]:
def log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr):
    lik_array = np.zeros(len(xvals))
    pmf = pdf_eta(lambda_inc_k, tau_lambda_int_abr)
    rounded = np.round(xvals)
    for n in range(0, len(xvals)):
        lik_array[n] = pmf[np.int(rounded[n])]
    loglik_array = np.log(lik_array)
    
    return np.sum(loglik_array)

def crit_log_lik(params, *args):
    tau_lambda_int_abr, lambda_inc_0, alpha = params
    xvals, W, ndim = args
    seq = np.arange(ndim)
    lambda_inc_k = np.array(lambda_inc_0 * alpha**seq)
    loglik = log_lik(xvals, lambda_inc_k, alpha, tau_lambda_int_abr)
    crit_val = -loglik
    print(params, crit_val)
    
    return crit_val

In [73]:
params_init = np.array([0.159, .517, .9999])
bnds = ((1e-5, None),(1e-5, None),(1e-5, 1-1e-5))
ndim = 750
W = np.eye(3)
MLE_data = np.array(cit_abr.tot_abr_class_cit_correct.dropna())
gmm_args = (MLE_data, W, ndim)
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
'''
results_MLE = opt.minimize(crit_log_lik, params_init, args=(gmm_args),
                                 method='Nelder-Mead')
'''
tau_lambda_int_abr_MLE, lambda_inc_0_MLE, alpha_MLE = results_MLE.x
print('tau_lambda_int_abr_MLE=', tau_lambda_int_abr_MLE, 'lambda_inc_0_MLE=', lambda_inc_0_MLE,
      'alpha_MLE=', alpha_MLE)

[ 0.159   0.517   0.9999] 785605.168672
[ 0.15900001  0.517       0.9999    ] 785605.1878
[ 0.159       0.51700001  0.9999    ] 785605.162789
[ 0.159       0.517       0.99990001] 785604.989459
[ 0.15899973  1.517       0.9999    ] 635807.043183
[ 0.15899974  1.517       0.9999    ] 635807.043805
[ 0.15899973  1.51700001  0.9999    ] 635807.043118
[ 0.15899973  1.517       0.99990001] 635806.982096
[ 0.1588314   1.52584424  0.99999   ] 635201.919835
[ 0.15883141  1.52584424  0.99999   ] 635201.92034
[ 0.1588314   1.52584425  0.99999   ] 635201.919783
[ 0.1588314   1.52584424  0.99999001] 635201.860113
[ 0.15813777  1.57191255  0.99999   ] 635031.599136
[ 0.15813778  1.57191255  0.99999   ] 635031.599263
[ 0.15813777  1.57191256  0.99999   ] 635031.599124
[ 0.15813777  1.57191255  0.99999001] 635031.541814
[ 0.15791279  1.58671557  0.99999   ] 635020.101513
[ 0.1579128   1.58671557  0.99999   ] 635020.101521
[ 0.15791279  1.58671558  0.99999   ] 635020.101512
[ 0.15791279  1.58671557  0

In [74]:
print(results_MLE)
print(crit_log_lik((tau_lambda_int_abr_MLE, lambda_inc_0_MLE, 1.0), MLE_data, W, ndim))

      fun: 635020.05046736216
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  0.00000000e+00,   1.16415322e-02,  -5.65243737e+06])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 28
      nit: 6
   status: 0
  success: True
        x: array([ 0.15789664,  1.58777568,  0.99999   ])
(0.15789663790006958, 1.5877756818483664, 1.0) 634963.572062
634963.572062


In [75]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', normed=True, bins = 400, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.arange(ndim)
plt.plot(dist_pts, pdf_eta2(lambda_inc_0_MLE, alpha_MLE, tau_lambda_int_abr_MLE, ndim),
         linewidth=2, color='r')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f083f57c88>]

### Robustness

In [357]:
#other moments
#share of incremental patents
cit_inc = data2[(data2.Increment == 1)]
share_inc_data = np.size(np.array(cit_inc.patent)) / (np.size(np.array(cit_inc.patent)) +
                                     np.size(np.array(cit_abr.patent)))
share_inc_mod = lambda_inc_GMM2 / (lambda_inc_GMM2 + tau_lambda_int_abr_GMM2)

print('share_inc_data = ', share_inc_data, 'share_inc_mod = ', share_inc_mod)



share_inc_data =  0.899991542938089 share_inc_mod =  0.775411152915
inc_ss_data =  0.051 inc_ss_mod =  0.946494074711


## Entry Rate Calculations

In [5]:
Patent_evol = pd.read_csv('evol_pat_year_count_df.csv', sep = ',', header = 0, index_col = 0)
Innov_emp = pd.read_csv('Innov_emp.csv', sep = ',', header = 0, index_col = 0)
#Entry rate per 5 years (by employment)
entry_rate1976 = Patent_evol.emp[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1981 = Patent_evol.emp[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1986 = Patent_evol.emp[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1991 = Patent_evol.emp[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1996 = Patent_evol.emp[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate2001= Patent_evol.emp[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)

Entry rates: 
1976 - 1980 =  0.0021245205023886736 
1981 - 1985 =  0.0023513519177680454 
1986 - 1990 =  0.002191138646583526 
1991 - 1995 =  0.001965464743272693 
1996 - 2000 =  0.0044289127179007156 
2001 - 2005 =  0.0018598440948136667


In [6]:
#Entry rate per 5 years (by nb firms)
entry_rate1976 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1976) & 
                                                          (Innov_emp.fyear < 1981)].sum()
entry_rate1981 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1981) & 
                                                          (Innov_emp.fyear < 1986)].sum()
entry_rate1986 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1986) & 
                                                          (Innov_emp.fyear < 1991)].sum()
entry_rate1991 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1991) & 
                                                          (Innov_emp.fyear < 1996)].sum()
entry_rate1996 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1996) & 
                                                          (Innov_emp.fyear < 2001)].sum()
entry_rate2001 = Patent_evol.firstYear[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 2001) & 
                                                          (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)
entry_rate = np.mean(np.array([entry_rate1976, entry_rate1981, entry_rate1986, entry_rate1991
                             , entry_rate1996, entry_rate2001]))
print('Mean entry rate = ', entry_rate)
lambda_e = entry_rate

Entry rates: 
1976 - 1980 =  0.0704656355967 
1981 - 1985 =  0.0591484464902 
1986 - 1990 =  0.0625711845103 
1991 - 1995 =  0.067709919016 
1996 - 2000 =  0.0710200455328 
2001 - 2005 =  0.0266078444889
Mean entry rate =  0.0595871792725
