[View in Colaboratory](https://colab.research.google.com/github/subhamghimire/Random-Stuffs/blob/master/Cython.ipynb)

In [0]:
pip install cython


SyntaxError: ignored

In [0]:
!pip install cython
!pip install spacy

Collecting cython
[?25l  Downloading https://files.pythonhosted.org/packages/19/8e/32b280abb0947a96cdbb8329fb2014851a21fc1d099009f946ea8a8202c3/Cython-0.28.5-cp36-cp36m-manylinux1_x86_64.whl (3.4MB)
[K    100% |████████████████████████████████| 3.4MB 6.9MB/s 
[?25hInstalling collected packages: cython
Successfully installed cython-0.28.5
Collecting spacy
[?25l  Downloading https://files.pythonhosted.org/packages/24/de/ac14cd453c98656d6738a5669f96a4ac7f668493d5e6b78227ac933c5fd4/spacy-2.0.12.tar.gz (22.0MB)
[K    100% |████████████████████████████████| 22.0MB 1.8MB/s 
Collecting murmurhash<0.29,>=0.28 (from spacy)
  Downloading https://files.pythonhosted.org/packages/5e/31/c8c1ecafa44db30579c8c457ac7a0f819e8b1dbc3e58308394fff5ff9ba7/murmurhash-0.28.0.tar.gz
Collecting cymem<1.32,>=1.30 (from spacy)
  Downloading https://files.pythonhosted.org/packages/f8/9e/273fbea507de99166c11cd0cb3fde1ac01b5bc724d9a407a2f927ede91a1/cymem-1.31.2.tar.gz
Collecting preshed<2.0.0,>=1.0.0 (from spacy)

In [0]:
#pseudo random number generator, dimensions of rectangle
from random import random

#OOP 0:) 
class Rectangle:
    def __init__(self, w, h):
        self.w = w
        self.h = h
    #formula for area of rectangle
    def area(self):
        return self.w * self.h

#check each to see if its area is within our threshold
def check_rectangles_py(rectangles, threshold):
    #init the counter
    n_out = 0
    #for each rectangle
    for rectangle in rectangles:
        #check if its within our threshold, if so, add it to our counter
        if rectangle.area() > threshold:
            n_out += 1
    return n_out

def main_rectangles_slow():
    #init number of rectangles
    n_rectangles = 10000000
    #init rectangle object list
    rectangles = list(Rectangle(random(), random()) for i in range(n_rectangles))
    #perform check
    n_out = check_rectangles_py(rectangles, threshold=0.25)
    print(n_out)

In [0]:
%%time
# Let's run it:
main_rectangles_slow()

4036634
CPU times: user 16.1 s, sys: 1.62 s, total: 17.8 s
Wall time: 17.8 s


In [0]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [0]:
%%cython
#memory management helper for Cython
from cymem.cymem cimport Pool
#good ol python
from random import random

#The cdef statement is used to declare C variables,types, and functions
cdef struct Rectangle:
    #C variables
    float w
    float h

#the "*" is the pointer operator, it gives value stored at particular address
#this saves memory and runs faster, since we don't have to duplicate the data
cdef int check_rectangles_cy(Rectangle* rectangles, int n_rectangles, float threshold):
    cdef int n_out = 0
    # C arrays contain no size information => we need to state it explicitly
    for rectangle in rectangles[:n_rectangles]:
        if rectangle.w * rectangle.h > threshold:
            n_out += 1
    return n_out

  #python uses garbage collection instead of manual memory management
  #which means developers can freely create objects
  #and Python's memory manager will periodically look for any
  # objects that are no longer referenced by their program
  #this overhead makes demands on the runtime environment (slower)
  # so manually memory management is better
def main_rectangles_fast():
    cdef int n_rectangles = 10000000
    cdef float threshold = 0.25
    #The Poool Object will save memory addresses internally
    #then free them when the object is garbage collected
    
    cdef Pool mem = Pool()
    cdef Rectangle* rectangles = <Rectangle*>mem.alloc(n_rectangles, sizeof(Rectangle))
    for i in range(n_rectangles):
        rectangles[i].w = random()
        rectangles[i].h = random()
    n_out = check_rectangles_cy(rectangles, n_rectangles, threshold)
    print(n_out)

In [0]:
%%time
main_rectangles_fast()

4035121
CPU times: user 757 ms, sys: 29.3 ms, total: 786 ms
Wall time: 783 ms


In [0]:
# Set up spaCy
import spacy.cli
spacy.cli.download("en")
nlp = spacy.load('en')



# Test Data
multiSentence = "what is your name?."
parsedData = nlp(multiSentence)


[93m    Linking successful[0m
    /usr/local/lib/python3.6/dist-packages/en_core_web_sm -->
    /usr/local/lib/python3.6/dist-packages/spacy/data/en

    You can now load the model via spacy.load('en')



In [0]:
# Let's look at the part of speech tags of the first sentence
for span in parsedData.sents:
    sent = [parsedData[i] for i in range(span.start, span.end)]
    break

for token in sent:
    print(token.orth_, token.pos_)

what NOUN
is VERB
your ADJ
name NOUN
? PUNCT
. PUNCT


In [0]:
# Let's look at the dependencies of this example:# Let's  
example = "The boy with the spotted dog quickly ran after the firetruck."
parsedEx = nlp(example)
# shown as: original token, dependency tag, head word, left dependents, right dependents
for token in parsedEx:
    print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights])

The det boy [] []
boy nsubj ran ['The'] ['with']
with prep boy [] ['dog']
the det dog [] []
spotted amod dog [] []
dog pobj with ['the', 'spotted'] []
quickly advmod ran [] []
ran ROOT ran ['boy', 'quickly'] ['after', '.']
after prep ran [] ['firetruck']
the det firetruck [] []
firetruck pobj after ['the'] []
. punct ran [] []


In [0]:
# Let's look at the named entities of this example:
example = "Apple's stocks dropped dramatically after the death of Steve Jobs in October."
parsedEx = nlp(example)
for token in parsedEx:
    print(token.orth_, token.ent_type_ if token.ent_type_ != "" else "(not an entity)")

print("-------------- entities only ---------------")
# if you just want the entities and nothing else, you can do access the parsed examples "ents" property like this:
ents = list(parsedEx.ents)
for entity in ents:
    print(entity.label, entity.label_, ' '.join(t.orth_ for t in entity))

Apple ORG
's (not an entity)
stocks (not an entity)
dropped (not an entity)
dramatically (not an entity)
after (not an entity)
the (not an entity)
death (not an entity)
of (not an entity)
Steve PERSON
Jobs PERSON
in (not an entity)
October DATE
. (not an entity)
-------------- entities only ---------------
381 ORG Apple
378 PERSON Steve Jobs
388 DATE October
