In [3]:
import spacy
from spacy import en
import en_core_web_md
nlp = en_core_web_md.load()

from spacy.en.language_data import STOP_WORDS
stopwords = {}
for word in STOP_WORDS:
    stopwords[word]=''
    
from gensim.models import Word2Vec as wv
model = wv.load('models/Insert_Model_Name')

from collections import Counter as c
import itertools
from __future__ import print_function
import functools
import re
import numpy as np

In [229]:
class Summarizer():
    
    def __init__(self,
                 model=model,
                 parser=nlp
                ):
        self.model = model
        self.parser = parser
    
    def tokenizer(self, parsed):
        
        """
        Tokenizes a sentence into tokens and returns list of words 
        and it's mapping to frequencies.
        
        1. parsed (input): SpaCy parsed text
            * format -> <class 'spacy.tokens.doc.Doc'>
        
        2. words (output): List of 'unique nouns'
            * format: list
        
        3. words_to_localcount (output): Mapping of nouns to their frequency 
                                         in input text
            * format: dict
            
        4. words_to_globalcount (output): Mapping of nouns to their frequency 
                                          in word2vec model's vocab
            * format: dict
        
        """
        
        words=[]
        for sent in parsed.sents:
            for token in sent:
                if token.pos_ == 'NOUN' or token.pos_ == 'PROPN':
                    words.append(token.text.lower())
        
        word_to_localcount = c(words)
        words = list(set(words))
        word_to_globalcount = []
        
        for word in words:
            try:
                glob_count = self.model.wv.vocab[word].count
            except KeyError:
                glob_count = 0
            
            word_to_globalcount.append((word,glob_count))
        
        word_to_globalcount = dict(word_to_globalcount)
        
        return words, word_to_localcount, word_to_globalcount
        
        
    
    def semantic_centroids(self, nouns):
        
        """
        Finds Semantic Centroids from list of Nouns
        
        1. nouns (input): List of nouns
            * format -> list
        
        2. noun_to_score (output): Mapping of Nouns to their relecance weight
            * format: dict
        
        """
        
        nouns_score = []
        
        for noun1 in nouns:
            try:
                model[noun1]
                score = 0
                for noun2 in nouns:
                    if noun1!=noun2:
                        try:
                            score+=model.similarity(noun1,noun2)
                        except:
                            pass
                            
            except KeyError:
                score = 1
                
            nouns_score.append((noun1,score))
            
        noun_to_score = dict(nouns_score)
        
        return noun_to_score
            
            

                    
                
    def get_wordweight(self, parsed):
        
        """
        Tokenizes a sentence into tokens and returns list of words 
        and it's mapping to frequencies.
        
        1. parsed (input): SpaCy parsed text
            * format -> <class 'spacy.tokens.doc.Doc'>
        
        2. word_to_weight (output): Mapping of Nouns to their respective 
                                    Frequency and Semantics based weight
            * format: dict
        
        """
        
        words,word_to_localcount,word_to_globalcount = self.tokenizer(parsed)
        
        centroids = self.semantic_centroids(words)
        
        word_to_weight = []
        
        for word in words:
            weight = np.log(word_to_localcount[word]+1)/\
                np.log((word_to_globalcount[word]+2)**2)*\
                centroids[word]
            word_to_weight.append((word,weight))
            
        word_to_weight = dict(word_to_weight)
        
        return word_to_weight
    
    def get_summary(self, text):
        
        """
        Tokenizes a sentence into tokens and returns list of words 
        and it's mapping to frequencies.
        
        1. text (input): Input text to be summarized
            * format -> Unicode
        
        2. word_to_weight (output): Extractive Summary of the text
            * format: Unicode
        
        """
        
        parsed = self.parser(text)
        word_to_weight = self.get_wordweight(parsed)
        
        sents_score=[]
        for sent in parsed.sents:
            sent_score=0
            for word in sent:
                try:
                    sent_score+=word_to_weight[word.text.lower()]
                except KeyError:
                    pass
            sents_score.append((sent_score,sent.text))
        return "\n\n".join([sent for score,sent in sorted(sents_score,reverse=True)[:5]])

In [223]:
summ = Summarizer()

In [224]:
summary = summ.get_summary(text)

In [225]:
print (summary)

The lack of type information in function signatures combined with support for operator overloading and just-in-time loading of modules at runtime means that the most common type inference algorithms have nothing to work with until the point in the program's execution when the types are known anyway.

Really finally, there is a version of Python called Jython that compiles to JVM byte-codes, allowing very simple integration of Python with Java, and which gives Python programmers access to Java's deep and powerful libraries.

Java's automatic type conversions are extremely limited, and the compiler insists that objects passed through interfaces be of a type convertible to the target type, either by inheritance or automatic type promotion.

In Python, names have no strong binding to their type, and thanks to duck typing, function arguments can be used to pass in any object whose interface supports the operations required by the function.

In Python this isn't possible, but there is a supe