[View in Colaboratory](https://colab.research.google.com/github/thayumaanavan/C-programming-for-ML/blob/master/Cython.ipynb)

In [1]:
!pip install cython
!pip install spacy

Collecting cython
[?25l  Downloading https://files.pythonhosted.org/packages/19/8e/32b280abb0947a96cdbb8329fb2014851a21fc1d099009f946ea8a8202c3/Cython-0.28.5-cp36-cp36m-manylinux1_x86_64.whl (3.4MB)
[K    100% |████████████████████████████████| 3.4MB 5.9MB/s 
[?25hInstalling collected packages: cython
Successfully installed cython-0.28.5
Collecting spacy
[?25l  Downloading https://files.pythonhosted.org/packages/24/de/ac14cd453c98656d6738a5669f96a4ac7f668493d5e6b78227ac933c5fd4/spacy-2.0.12.tar.gz (22.0MB)
[K    100% |████████████████████████████████| 22.0MB 1.5MB/s 
Collecting murmurhash<0.29,>=0.28 (from spacy)
  Downloading https://files.pythonhosted.org/packages/5e/31/c8c1ecafa44db30579c8c457ac7a0f819e8b1dbc3e58308394fff5ff9ba7/murmurhash-0.28.0.tar.gz
Collecting cymem<1.32,>=1.30 (from spacy)
  Downloading https://files.pythonhosted.org/packages/f8/9e/273fbea507de99166c11cd0cb3fde1ac01b5bc724d9a407a2f927ede91a1/cymem-1.31.2.tar.gz
Collecting preshed<2.0.0,>=1.0.0 (from spacy)

In [0]:
from random import random

class Rectangle:
  
  def __init__(self, w, h):
    self.w = w
    self.h = h
  
  def area(self):
    return self.w * self.h
  
def check_rectangle_py(rectangles, threshold):
  n_out = 0
  
  for rectangle in rectangles:
    if rectangle.area() > threshold:
      n_out +=1
  return n_out

def main_rectangles_slow():
  n_rectangles = 10000000
  rectangles = list(Rectangle(random(), random()) for i in range(n_rectangles))
  n_out = check_rectangle_py(rectangles, threshold =0.25 )

In [6]:
%%time
main_rectangles_slow()

CPU times: user 15.8 s, sys: 1.71 s, total: 17.6 s
Wall time: 17.5 s


In [0]:
%load_ext Cython

In [10]:
%%cython
from cymem.cymem cimport Pool
from random import random

cdef struct Rectangle:
  float w
  float h

cdef int check_rectangles_cy(Rectangle* rectangles, int n_rectangles, float threshold ):
  cdef int n_out =0
  
  for rectangle in rectangles[:n_rectangles]:
    if rectangle.w * rectangle.h > threshold :
      n_out +=1
  return n_out

def main_rectangles_fast():
  cdef int n_rectangles = 10000000
  cdef float threshold = 0.25
  
  cdef Pool mem = Pool()
  cdef Rectangle* rectangles = <Rectangle*>mem.alloc(n_rectangles, sizeof(rectangles))
  
  for i in range(n_rectangles):
    rectangles[i].w = random()
    rectangles[i].h = random()
  n_out = check_rectangles_cy(rectangles, n_rectangles, threshold)
  print(n_out)



In [11]:
%%time
main_rectangles_fast()

4036693
CPU times: user 773 ms, sys: 30.5 ms, total: 803 ms
Wall time: 800 ms


In [14]:
#Spacy
import spacy.cli

spacy.cli.download('en')

nlp = spacy.load('en')

multiSentence = "Build it, train it, test it , makes it, denser, deeper, faster, smarter"\
"I i got to read papers to try my models"\
"hello how are you"

parsedData = nlp(multiSentence)


[93m    Linking successful[0m
    /usr/local/lib/python3.6/dist-packages/en_core_web_sm -->
    /usr/local/lib/python3.6/dist-packages/spacy/data/en

    You can now load the model via spacy.load('en')



In [15]:
#POS tagging

for span in parsedData.sents:
  sent = [parsedData[i] for i in range(span.start, span.end)]
  break

for token in sent:
  print(token.orth_, token.pos_)

Build VERB
it PRON
, PUNCT
train VERB
it PRON
, PUNCT
test VERB
it PRON
, PUNCT
makes VERB
it PRON
, PUNCT
denser NOUN
, PUNCT
deeper ADJ
, PUNCT
faster ADV
, PUNCT
smarterI PROPN
i PRON
got VERB
to PART
read VERB
papers NOUN
to PART
try VERB
my ADJ
modelshello NOUN
how ADV
are VERB
you PRON


In [16]:
#dependency tagging
example = 'when it rain, it floods'
parsedEx = nlp(example)

for token in parsedEx:
  print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights])
 

when advmod rain [] []
it nsubj rain [] []
rain advcl floods ['when', 'it'] []
, punct floods [] []
it nsubj floods [] []
floods ROOT floods ['rain', ',', 'it'] []
