# Patent Success Prediction

## This notebook takes in data from pending applications and predicts possibility of acceptance based on supporting information and similarity to previously acccpted patents.


### The following 5 cells flatten the data from pending applications into a usable form

In [1]:
import pandas as pd
from IPython.display import IFrame
import urllib.request
import json
headers=['applicantid','patentId','patentNumber','supportingMaterial','hasRepresentative','natureOfPatent','hasPublications', 'creator', 'busisnessSize', 'businessName', 'isValid', 'track1NonProvisionalApp','provisional','description','usNationalStageNumber','acceleratedExam','nonProvisionalExam','reexam','reissue','isDesignApplication', 'country','info','isForeignPriority','isInternationalDesignApp','receivingOffice']


In [None]:
applicant = open('applicant')
appId=[]
creator=[]
busSize=[]
busName=[]
for line in applicant:
    line=line.split(',')
    if len(line)<6:
        continue
    x=5
    bussize=line[x]
    while bussize not in ['Small', 'Medium', 'Large', 'Independent'] and x<len(line)-2:
        x+=1
    else:
        busSize.append(line[x])
        busName.append(line[x+1].strip('\n'))
    appId.append(line[0])
    creator.append(line[2])
df=pd.DataFrame({'applicantId':appId, 'creator':creator, 'businessSize':busSize, 'businessName':busName })
df=df[1:]


In [None]:
applicant= open('application')
for line in applicant:
        line=line.split(',')
        apid=line[5]
        valid=line[7]
        patid=line[6]
        for row in range(1,len(df)):
            if apid==df.at[row,'applicantId']:
                df.loc[row,'isValid']=valid.strip('\n')
                df.loc[row, 'patentId']=patid
                break


        

In [None]:
applicant = open('patent')
for line in applicant:
    try:
        line=line.split(',')
        patid=line[0]
        patnum=line[1]
        product=line[2]
        rep=line[4]
        x=5
        while line[x] not in ['Chemical', 'Material', 'Structural'] and x<len(line)-2:
            x+=1
        nature=line[x]
        pub=line[x+1]
    except IndexError:
        continue
    for row in range(1, len(df)):
        if patid == df.loc[row, 'patentId']:
            df.loc[row, 'patentNumber'] = patnum
            df.loc[row, 'productName'] = product
            df.loc[row, 'hasRepresentative'] = rep
            df.loc[row, 'patentNature'] = nature
            df.loc[row, 'hasPublication'] = pub
df=df[df['creator']!= 'NULL']
df=df[df['businessSize']!='']


In [None]:
df=df.reset_index()
df.drop('index', axis=1, inplace=True)
applicant=open('DomesticForeignInfluence')
for line in applicant:
    line=line.split(',')
    patnum=line[4]
    country=line[1]
    info=line[2]
    isforeigninf=line[3]
    for row in range(1, len(df)):
        if str(patnum).strip() == str(df.loc[row, 'patentNumber']).strip():
            df.loc[row, 'country'] = country
            df.loc[row, 'info'] = info
            df.loc[row, 'isForeignInfluence'] = isforeigninf


df


NameError: name 'df' is not defined

### The following three cells count up the amount of supporting information provided with the patent such as publications, exams, and representatives.

In [None]:

for row in range(0, len(df)):
    exams=0
    applicant= open('supplementalExam')
    for line in applicant:
        line=line.split(',')
        if line[1] == df.loc[row, 'patentNumber']:
            df.loc[row, 'hasExam'] = 1
            exams+=1
    if exams==0:
        df.loc[row, 'hasExam'] = 0
    else:
        df.loc[row, 'numExams'] = exams
    applicant.close()
    


In [None]:

for row in range(0, len(df)):
    reps=0
    applicant= open('representative')
    for line in applicant:
        line=line.split(',')
        if len(line)<8:
            continue
        if str(line[7]).strip() == str(df.loc[row, 'patentNumber']).strip():
            reps+=1
    applicant.close()
    
    df.loc[row, 'numReps'] = reps

In [None]:

for row in range(0, len(df)):
    pubs=0
    applicant= open('Publication')
    for line in applicant:
        line=line.split(',')
        if len(line)<8:
            continue
        if str(line[7]).strip() == str(df.loc[row, 'patentNumber']).strip():
            pubs+=1
    applicant.close()
    df.loc[row, 'numPubs'] = pubs
df.fillna(0, inplace=True)


## We now send data about the application to an API that supplies information about accepted patents. We are counting up how many similar patents have been accepted.

In [None]:

for row in range(0,len(df)):
    datalist=[]
    s = df.loc[row, 'productName']
    numSimPats=0
    for x in range(1,20):
        df.loc[row, "similarPatents"]=numSimPats
        print('.', end='')
        url = """https://www.patentsview.org/api/patents/query?q="""
        try:
            ss= s.split()
        except AttributeError:
            continue
        if len(ss) == 1:
            query="""{"_text_all":{"patent_title":"%s"}}&o={"page":%d,"per_page":50}"""%(s,x)
        else:
            query="""{"_text_all":{"patent_title":%s}}&o={"page":%d,"per_page":50}"""%(str(s).replace(' ', '%20'),x)
        try:
            uh = urllib.request.urlopen(url+query)
        except:
            print('!', end='')
            continue
        data = uh.read().decode()
        if json.loads(data)['total_patent_count']==0:
            continue
        datalist.append(data)
        numSimPats+=json.loads(data)['count']
    

## The cell below runs an algorithm that rates the usefulness of supporting information A.K.A. the 'complexity' of the application, compares that to the number of similar patents that have been accepted, and assigns an approval rating.

In [113]:
df['Complexity']=df['numExams']+df['numPubs']+df['numReps']
for row in range(len(df)):
    if df.loc[row,'businessSize'] =='Large':
        x=2
    if df.loc[row,'businessSize'] =='Small':
        x=1.5
    if df.loc[row,'businessSize'] =='Medium':
        x=1.2
    if df.loc[row,'businessSize'] =='Independent':
        x=1
    df.loc[row,'Complexity']/=x
    if df.loc[row,'Complexity']==0 and df.loc[row, 'similarPatents']==0:
        continue
    if df.loc[row,'Complexity']==0:
        df.loc[row,'Complexity']=.1
    if df.loc[row, 'similarPatents']==0:
        df.loc[row, 'similarPatents']=.1

df['Approval'] = (df['similarPatents']/50)*(df['Complexity']*50)

df['Approval'].describe()

count    82.000000
mean      0.820802
std       1.812991
min       0.000010
25%       0.000771
50%       0.006250
75%       0.476250
max       9.000000
Name: Approval, dtype: float64

In [111]:
size=df[['businessSize','Approval','businessName']]
likely=df[['Approval','similarPatents','Complexity','businessName']]
size.to_csv('finalsize.csv')
likely.to_csv('finallikely.csv')
df.to_csv('finalfull.csv')

# Approval Likelihood by Similar Patents and Supporting Info
 This visualization represents the approval likelihood with the x-axis being the amount of supporting information and the y-axis being the number of similar patents that have been approved. Lighter green patents are very likely to be approved, while darker purple are much more unlikely. Hover over the dots to see the name of the product and the exact percentage of approval likelihood.

In [65]:
display(IFrame('https://public.flourish.studio/visualisation/1233063/',width=900, height=800, frameborder=0, scrolling="no"))

In [2]:
df=pd.read_csv('finalfull.csv')

# Approval Likelihood by Business Size
The following graph takes the likelihood of approval and organizes it by company size. From our test data, you can see patents from independents have a greater ratio of rejected patents to accepted patents, whereas medium sized companies have a more even spread. 

In [18]:
display(IFrame('https://public.flourish.studio/visualisation/1233200/',width=900, height=800, frameborder=0, scrolling="no"))

Flattening and reorganizing data for more graphs

In [6]:
network=df[['businessSize', 'productName']]
points=df[['productName','patentNature']]
points

Unnamed: 0,productName,patentNature
0,"""Beeswax Lip Balm""",Structural
1,"""Paint Markers""",Structural
2,Boat,Material
3,"""Insulated Waterbottle""",Structural
4,"""Transforming Earphones""",Material
...,...,...
78,"""Disappearing Ink""",Material
79,"""Flying Car""",Material
80,"""Phone Case Design""",Structural
81,"""Extending Table Legs""",Structural


In [11]:
points.insert(2,'size',1)

In [12]:
df.businessSize.value_counts()
points

Unnamed: 0,productName,patentNature,size
0,"""Beeswax Lip Balm""",Structural,1
1,"""Paint Markers""",Structural,1
2,Boat,Material,1
3,"""Insulated Waterbottle""",Structural,1
4,"""Transforming Earphones""",Material,1
...,...,...,...
78,"""Disappearing Ink""",Material,1
79,"""Flying Car""",Material,1
80,"""Phone Case Design""",Structural,1
81,"""Extending Table Legs""",Structural,1


In [13]:
points=points.append({'productName':'Small','patentNature':'Size','size':len(df[df['businessSize']=='Small'])}, ignore_index=True)
points=points.append({'productName':'Medium','patentNature':'Size','size':len(df[df['businessSize']=='Medium'])}, ignore_index=True)
points=points.append({'productName':'Large','patentNature':'Size','size':len(df[df['businessSize']=='Large'])}, ignore_index=True)
points=points.append({'productName':'Independent','patentNature':'Size','size':len(df[df['businessSize']=='Independent'])}, ignore_index=True)

In [14]:
points.tail(15)

Unnamed: 0,productName,patentNature,size
72,Flashlight,Material,1
73,"""Flaming Cars""",Material,1
74,"""Corn Plant""",Chemical,1
75,"""Indigo Food Coloing""",Chemical,1
76,"""Bluetooth Mouse""",Structural,1
77,"""Corn Harvester""",Structural,1
78,"""Disappearing Ink""",Material,1
79,"""Flying Car""",Material,1
80,"""Phone Case Design""",Structural,1
81,"""Extending Table Legs""",Structural,1


In [15]:
points.to_csv('points.csv')
network.to_csv('links.csv')

In [36]:
df=df[df['patentNature']!='0']
B=df[df['businessSize']=='Small'].patentNature.value_counts()
C=df[df['businessSize']=='Medium'].patentNature.value_counts()
D=df[df['businessSize']=='Large'].patentNature.value_counts()
E=df[df['businessSize']=='Independent'].patentNature.value_counts()
A=['Structural','Material','Chemical']

fd=pd.DataFrame({'Small':B,'Medium':C,'Large':D,'Independent':E})

In [37]:
fd

Unnamed: 0,Small,Medium,Large,Independent
Chemical,5,6,5,2
Material,6,9,15,6
Structural,6,3,18,1


In [38]:
fd.to_csv('pie.csv')

# Patent Nature by Company Size

In [39]:

display(IFrame('https://public.flourish.studio/visualisation/1237847/',width=900, height=800, frameborder=0, scrolling="no"))

In [11]:
mat=0
st=0
chem=0
datalist=['Utility', 'design', 'Plant']
for s in datalist:
    for x in range(1,20):
        url = """https://www.patentsview.org/api/patents/query?q="""
        query="""{"patent_type":"%s"}&o={"page":%d,"per_page":50}"""%(s,x)
        uh = urllib.request.urlopen(url+query)
        
        data = uh.read().decode()
        print(data)
        if json.loads(data)['total_patent_count']==0:
            continue
        if s =='Utility':
            mat+=json.loads(data)['count']
        if s == 'design':
            chem+=json.loads(data)['count']
        if s == 'Plant':
            st+=json.loads(data)['count']
    

{"patents":[{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},

{"patents":[{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},

{"patents":[{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},{"patent_type":"utility"},

{"patents":[{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type

{"patents":[{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type

{"patents":[{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type":"design"},{"patent_type

{"patents":[{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"pa

{"patents":[{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"pa

{"patents":[{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"patent_type":"plant"},{"pa