## Creating Histograms

In the cells below I import packages, import the summary citation dataset, and create indicator variables for external and abrupt patents. Then histograms and summary statistics are created for the following categories: internal incremental, internal abrupt, and external abrupt.

In [1]:
# Import packages
import pandas as pd
import numpy as np
import numpy.linalg as lin
import scipy.stats as sts
import scipy.integrate as intgr
import scipy.optimize as opt
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
#Setting plot colors
cmap1 = matplotlib.cm.get_cmap('summer')
%matplotlib notebook

In [2]:
#Loading data as a pandas dataset
data2 = pd.read_csv('cit_external_comn_share.csv', sep = ',', header = 0, index_col = 0)
#Deleting patents before 1976
data2 = data2[data2.appyear > 1975]

  mask |= (ar1 == a)


In [3]:
data2['external_share'] = data2.external_share.replace(np.nan, -1)
level = np.rint(np.size(data2['tot_cit_correct'].dropna())
                                                   - 0.1 * np.size(data2['tot_cit_correct'].
                                                                   dropna()))
cutoff = np.sort(data2['tot_cit_correct'].dropna())[int(level)]
cutoff

10.6616

In [13]:
#This function will be used to determine whether a patent is abrupt or incremental.
#A patent at the top 10% of expected citations is set to be aprupt

def abrupt(cites, cutoff):
    #The cutoff value that defines an abrupt innovation can be adjusted
    if cites >= cutoff:
        return 0
    else:
        return 1
    
#Creating a new column for the indicator variable 'incremental'
data2['Increment'] = data2['tot_cit_correct'].apply(abrupt, cutoff = cutoff)
data2['tot_abr_inc_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_correct']
data2['tot_abr_class_cit_correct'] = data2['tot_cit_correct'] - data2['tot_external_class_correct']

#Defining External Patents
#The definition will depend on the share of citations that are external.
#That variable has many missing values for patents with no citations. This line replaces
#those missing values with -1. 
data2['external_share'] = data2.external_share.replace(np.nan, -1)
#Now I define a function to determine whether a patent is external
def external(cites):
    cutoff = 0.5
    if (cites >= cutoff):
        return 1
    else:
        return 0
data2['External'] = data2['external_share'].apply(external)
print(np.mean(data2['Increment']))

0.899991542938089


In [7]:
print(np.size(data2[data2.External == 1]))
print(np.size(data2[data2.External == 0]))

8709180
29129020


In [7]:
#data3 = data2[data2.icl_class == 'G01N']
#data3.hist(column = 'appyear')

#Plotting internal incremental innovation
#Eliminated observations that are not internal incremental
data3 = data2[data2.Increment == 1]
data3.hist(column = 'tot_cit_correct', bins = 20, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Incremental Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
#Printing out summary statistics
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#External Abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 1)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: External, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Internal abrupt innovation
data3 = data2[(data2.Increment == 0) & (data2.External == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Internal, Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total abrupt innovation
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: All Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

#Total incremental citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_inc_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Incremental Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_abr_inc_cit_correct),',',
      np.std(data3.tot_abr_inc_cit_correct))

#Total same class citations to abrupt patents
data3 = data2[(data2.Increment == 0)]
data3.hist(column = 'tot_abr_class_cit_correct', bins = 50, edgecolor = 'black')
plt.title('Citation Distribution: Same-class Citations to Abrupt Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(
    data3.tot_abr_class_cit_correct),',', np.std(data3.tot_abr_class_cit_correct))

#Printing the amount of observations that would be "external incremental"
data3 = data2[(data2.Increment == 1) & (data2.External == 1)]
print('Summary Statistics for "External, incremental patents: ')
print('Total, mean, standard deviation:', len(data3), ',', np.mean(data3.tot_cit_correct),',',
      np.std(data3.tot_cit_correct))

<IPython.core.display.Javascript object>

Total, mean, standard deviation: 1702703 , 1.0452714309813296 , 2.293187572379963


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 139227 , 28.339301601706214 , 27.29738437627662


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 49980 , 28.811253965590097 , 28.92134667414797


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 28.463970240531847 , 27.736385607322156


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 8.850634295773176 , 16.309264325805366


<IPython.core.display.Javascript object>

Total, mean, standard deviation: 189207 , 10.06398987669824 , 18.42557679906264
Summary Statistics for "External, incremental patents: 
Total, mean, standard deviation: 296232 , 4.47239161219741 , 2.6815898976775547


## Estimation (in progress)

In [8]:
Patent_evol = pd.read_csv('evol_pat_year_count_df.csv', sep = ',', header = 0, index_col = 0)
Innov_emp = pd.read_csv('Innov_emp.csv', sep = ',', header = 0, index_col = 0)
#Entry rate per 5 years (by employment)
entry_rate1976 = Patent_evol.emp[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1981 = Patent_evol.emp[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1986 = Patent_evol.emp[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1991 = Patent_evol.emp[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate1996 = Patent_evol.emp[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
entry_rate2001= Patent_evol.emp[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].sum() / Innov_emp.emp[(Innov_emp.fyear >= 1976) &
                                                                   (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)

Entry rates: 
1976 - 1980 =  0.0021245205023886736 
1981 - 1985 =  0.0023513519177680454 
1986 - 1990 =  0.002191138646583526 
1991 - 1995 =  0.001965464743272693 
1996 - 2000 =  0.0044289127179007156 
2001 - 2005 =  0.0018598440948136667


In [9]:
#Entry rate per 5 years (by nb firms)
entry_rate1976 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1976) & (
    Patent_evol.firstYear < 1981)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1976) & 
                                                          (Innov_emp.fyear < 1981)].sum()
entry_rate1981 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1981) & (
    Patent_evol.firstYear < 1986)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1981) & 
                                                          (Innov_emp.fyear < 1986)].sum()
entry_rate1986 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1986) & (
    Patent_evol.firstYear < 1991)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1986) & 
                                                          (Innov_emp.fyear < 1991)].sum()
entry_rate1991 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1991) & (
    Patent_evol.firstYear < 1996)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1991) & 
                                                          (Innov_emp.fyear < 1996)].sum()
entry_rate1996 = Patent_evol.firstYear[(Patent_evol.firstYear >= 1996) & (
    Patent_evol.firstYear < 2001)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 1996) & 
                                                          (Innov_emp.fyear < 2001)].sum()
entry_rate2001 = Patent_evol.firstYear[(Patent_evol.firstYear >= 2001) & (
    Patent_evol.firstYear < 2006)].count()/Innov_emp.nbfirms[(Innov_emp.fyear >= 2001) & 
                                                          (Innov_emp.fyear < 2006)].sum()
print('Entry rates: \n1976 - 1980 = ', entry_rate1976, '\n1981 - 1985 = ', entry_rate1981,
     '\n1986 - 1990 = ', entry_rate1986, '\n1991 - 1995 = ', entry_rate1991,
     '\n1996 - 2000 = ', entry_rate1996, '\n2001 - 2005 = ', entry_rate2001)
entry_rate = np.mean(np.array([entry_rate1976, entry_rate1981, entry_rate1986, entry_rate1991
                             , entry_rate1996, entry_rate2001]))
print('Mean entry rate = ', entry_rate)
lambda_e = entry_rate

Entry rates: 
1976 - 1980 =  0.0704656355967 
1981 - 1985 =  0.0591484464902 
1986 - 1990 =  0.0625711845103 
1991 - 1995 =  0.067709919016 
1996 - 2000 =  0.0710200455328 
2001 - 2005 =  0.0266078444889
Mean entry rate =  0.0595871792725


In [9]:
def psi(theta):
    
    psi_k = [None] * 100
    for i in range(0, 100):
        psi_k[i] = theta * (1 - theta) ** i
    
    return psi_k

def omega(gamma_eta, alpha, theta, k):
    
    omega_k = ((gamma_eta * alpha ** k) * (1 - theta)) / (theta + (gamma_eta * alpha ** k
                                                                  ) * (1 - theta))
    return omega_k

def gamma_0(gamma_eta, alpha, theta, k):
    
    gamma_k_0 = (theta ** 2)*((1 - theta) ** k)/(theta + (gamma_eta * alpha ** k) * (1 - theta))
    
    return gamma_k_0

def gamma_n(gamma_eta, alpha, theta, n):
    
    gamma_k_n = [None] * 100
    for i in range(0, 100):
        gamma_k_n[i] = gamma_0(gamma_eta, alpha, theta, i) * (omega(gamma_eta, alpha, theta, i
                                                                       )) ** n
    return gamma_k_n

In [52]:
def model_moments(gamma_eta, alpha, theta):
    
    F_n_mod = [None] * 740
    for n in range(0, 740):
        F_n_mod[n] = np.dot(gamma_n(gamma_eta, alpha, theta, n), psi(theta))
    
    F_n_mean_mod = np.dot(F_n_mod, np.linspace(0, 739, 740))
    F_n_var_mod = np.dot(np.multiply(np.linspace(0, 739, 740) - F_n_mean_mod,
                                     np.linspace(0, 739, 740) - F_n_mean_mod), F_n_mod)
    F_n_kur_mod = np.dot(np.multiply(np.multiply(((np.linspace(0, 739, 740) - F_n_mean_mod)/F_n_var_mod),
                                    ((np.linspace(0, 739, 740) - F_n_mean_mod)/F_n_var_mod)),
                                    np.multiply(((np.linspace(0, 739, 740) - F_n_mean_mod)/F_n_var_mod),
                                    ((np.linspace(0, 739, 740) - F_n_mean_mod)/F_n_var_mod)))
                         , F_n_mod)
    
    return F_n_mean_mod, np.var(F_n_mod), sts.kurtosis(F_n_mod)

def data_moments(xvals):
    
#    unique, counts = np.unique(xvals, return_counts=True)
#    F_n_data = counts / np.size(np.array(xvals))
    
    return np.mean(xvals), np.var(xvals), sts.kurtosis(xvals)

def err_vec(xvals, gamma_eta, alpha, theta, simple):
    
    F_n_mean_mod, F_n_var_mod, F_n_kur_mod = model_moments(gamma_eta, alpha, theta)
    moms_mod = np.array([[F_n_mean_mod], [F_n_var_mod], [F_n_kur_mod]])
    F_n_mean_data, F_n_var_data, F_n_kur_data = data_moments(xvals)
    moms_data = np.array([[F_n_mean_data], [F_n_var_data], [F_n_kur_data]])
    if simple:
        err_vec = moms_mod - moms_data
    else:
        err_vec = (moms_mod - moms_data) / moms_data
    
    return err_vec

def crit_ext_distr(params, *args):
    
    gamma_eta, alpha, theta = params
    xvals, W = args
    err = err_vec(xvals, gamma_eta, alpha, theta, simple=False)
    crit_val = np.dot(np.dot(err.T, W), err) 
    
    return crit_val

In [53]:
cit_ext = data2[(data2.External == 1)]
ext_dist_data = np.array(cit_ext.tot_cit_correct.dropna().value_counts()) / np.size(
    np.array(cit_ext.tot_cit_correct.dropna()))
np.max(np.array(cit_ext.tot_cit_correct.dropna()))

739.26779999999997

In [None]:
gamma_eta_init = 0.5
alpha_init = 0.8
theta_init = 0.2
W_hat = np.eye(3)
params_init = np.array([gamma_eta_init, alpha_init, theta_init])
bnds = ((1e-5, None),(1e-5, 1-1e-5), (1e-5, 1-1e-5))
gmm_args = (np.array(cit_ext.tot_cit_correct.dropna()), W_hat)
results_ext_distr = opt.minimize(crit_ext_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
gamma_eta_GMM, alpha_GMM, theta_GMM = results_ext_distr.x
print('gamma_eta_GMM=', gamma_eta_GMM, 'alpha_GMM=', alpha_GMM, 'theta_GMM=', theta_GMM)

In [48]:
results_ext_distr

      fun: array([[ 1.52639745]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([ -2.70081513e-02,  -9.34467836e-02,  -8.21565038e-07])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 72
      nit: 4
   status: 0
  success: True
        x: array([ 10.        ,   0.99999   ,   0.49299031])

In [37]:
F_n_mean_mod, F_n_var_mod, F_n_kur_mod = model_moments(gamma_eta_GMM, alpha_GMM, theta_GMM)
F_n_mean_data, F_n_var_data, F_n_kur_data = data_moments(np.array(cit_ext.tot_cit_correct.dropna()))

err1 = err_vec(np.array(cit_ext.tot_cit_correct.dropna()), gamma_eta_GMM, alpha_GMM,
              theta_GMM, simple=False).reshape(3,)
print('F_n_mean_data =', F_n_mean_data, ', F_n_var_data =', F_n_var_data, 'F_n_kur_data =', F_n_kur_data)
print('F_n_mean_mod =', F_n_mean_mod, ', F_n_var_mod =', F_n_var_mod, 'F_n_kur_mod =', F_n_kur_mod)
print('Error vector=', err1)

F_n_mean_data = 12.1041436904 , F_n_var_data = 367.064652813 F_n_kur_data = 94.96028812639021
F_n_mean_mod = 10.7291586545 , F_n_var_mod = 4.29995375627e-05 F_n_kur_mod = 84.22476749925767
Error vector= [-0.11359623 -0.99999988 -0.11305274]


In [46]:
# Plot the histogram of the data
#Total same class citations to abrupt patents
data3 = data2[(data2.External == 1)]
data3.hist(column = 'tot_cit_correct', normed=True, bins = 500, edgecolor = 'black')
plt.title('Citation Distribution: Citations to External Patents', fontsize=15)
plt.xlabel('Citations')
plt.ylabel('Number of Patents')
plt.xlim([0, 30])

# Plot the estimated GMM
dist_pts = np.linspace(0, 739, 740)
F_n_GMM = [None] * 740
for n in range(0, 740):
    F_n_GMM[n] = np.dot(gamma_n(gamma_eta_GMM, alpha_GMM, theta_GMM, n), psi(theta_GMM))

plt.plot(dist_pts, F_n_GMM, linewidth=2, color='r')
plt.xlim([0, 100])

<IPython.core.display.Javascript object>

(0, 100)

In [41]:
err1 = err_vec(np.array(cit_ext.tot_cit_correct.dropna()), gamma_eta_GMM, alpha_GMM,
              theta_GMM, simple=False)
VCV2 = np.dot(err1, err1.T) / np.size(np.array(cit_ext.tot_cit_correct.dropna()))
print(VCV2)
W_hat2 = lin.pinv(VCV2)  # Use the pseudo-inverse calculated by SVD because VCV2 is ill-conditioned
print(W_hat2)

[[  2.96368741e-08   2.60896614e-07   2.94950802e-08]
 [  2.60896614e-07   2.29670117e-06   2.59648387e-07]
 [  2.94950802e-08   2.59648387e-07   2.93539647e-08]]
[[   5340.66550769   47014.45721156    5315.11376231]
 [  47014.45721156  413873.36160891   46789.52242804]
 [   5315.11376231   46789.52242804    5289.68426606]]


In [44]:
params_init = np.array([gamma_eta_GMM, alpha_GMM, theta_GMM - 0.5])
bnds = ((1e-5, None),(1e-5, 1-1e-5), (1e-5, 1-1e-5))
gmm_args = (np.array(cit_ext.tot_cit_correct.dropna()), W_hat2)
results2_ext_distr = opt.minimize(crit_ext_distr, params_init, args=(gmm_args),
                                 method='L-BFGS-B', bounds = bnds)
gamma_eta_GMM2, alpha_GMM2, theta_GMM2 = results2_ext_distr.x
print('gamma_eta_GMM2=', gamma_eta_GMM2, 'alpha_GMM2=', alpha_GMM2, 'theta_GMM2=', theta_GMM2)

gamma_eta_GMM2= 5233.21287537 alpha_GMM2= 0.448602034573 theta_GMM2= 0.764349116431


In [178]:
results2_ext_distr

      fun: array([[ 435406.99980417]])
 hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
      jac: array([  -2.66591087,  135.92652977,  131.82288967])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 20
      nit: 1
   status: 0
  success: True
        x: array([ 1.83683073,  0.99998891,  0.10701463])

In [40]:
plt.figure(8)
F_n_mean_mod2, F_n_var_mod2, F_n_kur_mod2 = model_moments(gamma_eta_GMM2, alpha_GMM2, theta_GMM2)
F_n_mean_data, F_n_var_data, F_n_kur_data = data_moments(np.array(cit_ext.tot_cit_correct.dropna()))

err2 = err_vec(np.array(cit_ext.tot_cit_correct.dropna()), gamma_eta_GMM2, alpha_GMM2,
              theta_GMM2, simple=False).reshape(3,)
print('F_n_mean_data =', F_n_mean_data, ', F_n_var_data =', F_n_var_data, 'F_n_kur_data =', F_n_kur_data)
print('F_n_mean_mod2 =', F_n_mean_mod2, ', F_n_var_mod2 =', F_n_var_mod2,
      'F_n_kur_mod2 =', F_n_kur_mod2)
print('Error vector=', err2)

F_n_mean_data = 12.1041436904 , F_n_var_data = 367.064652813 F_n_kur_data = 94.96028812639021
F_n_mean_mod2 = 9.99990000087e-11 , F_n_var_mod2 = 0.000998960040599 F_n_kur_mod2 = 995.0010010010003
Error vector= [-1.         -0.99999728  9.47807479]


In [180]:
plt.figure(8)
dist_pts = np.linspace(0, 739, 740)
F_n_GMM2 = [None] * 740
for n in range(0, 740):
    F_n_GMM2[n] = np.dot(gamma_n(gamma_eta_GMM2, alpha_GMM2, theta_GMM2, n), psi(theta_GMM2))

plt.plot(dist_pts, F_n_GMM2, linewidth=2, color='g')
plt.xlim([0, 100])

(0, 100)

## Junk Code

The cells below do the following things, which might eventually be of use to us:
1. This lists the classifications in order of number of patents.
2. This is an attempt to determine for each patent how many years was the last abrupt patent within its classification. The code works, but it takes to long to run on my computer. It's also incomplete.
3. This cell imports the patent evolution dataset.

In [68]:
#This line finds the most common classifications
data2['icl_class'].value_counts()

In [19]:
#This cell was meant to create a column for how what step each patent was at in its product line's innovation path.
#The code works, but it takes too long to run on my computer.


#Sorting the values by class, then by year, then by the variable "Increment". The loop below needs 
  #abrupt patents from the same class and year to be listed above incremental ones.
data2 = data2.sort_values(['icl_class', 'appyear', 'Increment'])
#Resetting the index of the dataframe so the first row is labeled "0"
data2 = data2.reset_index(drop = True)
#Creating a new column for the year of the latest abrupt patent within a patent's classification. For now this
  #variable is set equal to negative one. The loop below fills it with the correct values.
data2['year_latest_abrupt'] = (data2['patent']*0) - 1

#This moves through every row of the dataset and sets the 'year_latest_abrupt' variable
for i in range(1, int(len(data2)/10)):
    #If the patent is abrupt, then 'year_latest_abrupt" is just equal to that patent's application year
    if data2.loc[i, 'Increment'] == 0:
        data2.loc[i, 'year_latest_abrupt'] = data2.loc[i, 'appyear']
    #If the patent is incremental, I set 'year_latest_abrupt to be equal to the previous row's 'year_latest_abrupt',
        #but only if the previous observation is within the same classification.
    elif data2.loc[i, 'icl_class'] == data2.loc[i-1, 'icl_class']:
        data2.loc[i, 'year_latest_abrupt'] = data2.loc[i-1, 'year_latest_abrupt']
#By the end of this loop, the variable 'year_latest_abrupt' should be correct. For patent's that we don't know the
  #year of the latest abrupt patent in its classification, 'year_latest_abrupt should be equal to negative one.

KeyboardInterrupt: 

In [26]:
#Loading data as a pandas dataset
cols = np.append((0, 1, 2), np.arange(30, 61))
data = pd.read_csv('aux_cit_evolution_year_count_df.csv', sep = ',', header = 0, index_col = 0,
                   usecols = cols)
data = data[data.appyear > 1975]
data = data.replace(np.nan, 0)