In [None]:
# importing packages
import nltk
import re
from nltk.tokenize import sent_tokenize, word_tokenize
nltk.download('punkt')
!pip install scikit-fuzzy
!pip install sumy
import pandas as pd

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Collecting scikit-fuzzy
[?25l  Downloading https://files.pythonhosted.org/packages/6c/f0/5eb5dbe0fd8dfe7d4651a8f4e591a196623a22b9e5339101e559695b4f6c/scikit-fuzzy-0.4.2.tar.gz (993kB)
[K     |████████████████████████████████| 1.0MB 5.2MB/s 
Building wheels for collected packages: scikit-fuzzy
  Building wheel for scikit-fuzzy (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-fuzzy: filename=scikit_fuzzy-0.4.2-cp36-none-any.whl size=894070 sha256=10ac57f2405664f851434f07cdf34c456970d15e2736ac39cad276ac67ba60b3
  Stored in directory: /root/.cache/pip/wheels/b9/4e/77/da79b16f64ef1738d95486e2731eea09d73e90a72465096600
Successfully built scikit-fuzzy
Installing collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.4.2
Collecting sumy
[?25l  Downloading https://files.pythonhosted.org/packages/61/20/8abf92617ec80a2ebaec8dc1646a790fc9656a4a4377ddb9f0cc90bc9326/

In [None]:
# applying text cleaning
def text_cleaner(text):
  new_text = text.lower()
  new_text = re.sub("[^a-zA-Z0-9 .?]", " ", str(text))    # removing anything other than english letters, digits, period symbols and question marks
  new_text = re.sub("[ +]", " ", new_text) # removing unwanted whitespace
  new_text = re.sub("[.]{2,}",".", new_text) # removing unwanted period symbols
  return new_text

In [None]:
# removing stopwords
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
def stopwords_remover(text):
  word_tokens = word_tokenize(text)
  new_text = ""
  stop_words = set(stopwords.words('english'))
  for words in word_tokens:
    if words not in stop_words:
      new_text = new_text + words + " "
  return new_text

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# applying lemmatisation (not used in program)
nltk.download('wordnet')
def lemmatiser(text):
  from nltk.stem import WordNetLemmatizer
  l = WordNetLemmatizer()
  return l.lemmatize(text)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


In [None]:
# applying stemming
nltk.download('wordnet')
def stemmer(text):
  from nltk.stem import PorterStemmer
  ps =PorterStemmer()
  sentences = sent_tokenize(text)
  new_text = ""
  for sentence in sentences:
    temp_text = ""
    words = word_tokenize(sentence)
    for word in words:
      word = ps.stem(word)
      temp_text = temp_text + word +" "
    new_text = new_text + temp_text
  return new_text

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
import math
def tf_isf_score(text):
  sentences = sent_tokenize(text)
  sent_count = len(sentences)
  tf = dict()
  sf = dict()
  words_set = []
  scores = []
  for sentence in sentences:
    words = word_tokenize(sentence)
    for word in words:
      words_set.append(word)
      if (word,sentence) in tf:
        tf[word,sentence]+=1 # for each word, tf value is calculated 
      else:
        tf[word,sentence]=1
  words_set = set(words_set)
  for word in words_set:
    sf[word]=0
    for sentence in sentences:
      words = word_tokenize(sentence)
      for word2 in words:
        if word2 == word:
          sf[word]+=1
          continue
  for sentence in sentences:
    words = word_tokenize(sentence)
    score = 0
    for word in words:
      score += tf[word,sentence]*(math.log(sent_count/sf[word])) # TF-ISF score is calculated and returned
    scores.append(score)
  return scores

In [None]:
# returns position score
from nltk.tokenize import sent_tokenize
def position_score(text):
  text = sent_tokenize(text)
  sentence_count = len(text)
  sent_score = []
  for i in range(1,len(text)+1):
    pos_score = i/(sentence_count)
    if pos_score > 0.0 and pos_score <= 0.1:
      sent_score.append(0.17/sentence_count)
    elif pos_score > 0.1 and pos_score <= 0.2:
      sent_score.append(0.23/sentence_count)
    elif pos_score > 0.2 and pos_score <= 0.3:
      sent_score.append(0.14/sentence_count)
    elif pos_score > 0.3 and pos_score <= 0.4:
      sent_score.append(0.08/sentence_count)
    elif pos_score > 0.4 and pos_score <= 0.5:
      sent_score.append(0.05/sentence_count)
    elif pos_score > 0.5 and pos_score <= 0.6:
      sent_score.append(0.04/sentence_count)
    elif pos_score > 0.6 and pos_score <= 0.7:
      sent_score.append(0.06/sentence_count)
    elif pos_score > 0.7 and pos_score <= 0.8:
      sent_score.append(0.04/sentence_count)
    elif pos_score > 0.8 and pos_score <= 0.9:
      sent_score.append(0.04/sentence_count)
    elif pos_score > 0.9 and pos_score <= 1.0:
      sent_score.append(0.15/sentence_count)
  return sent_score

In [None]:
# returns part-of-speeches
from nltk import word_tokenize, pos_tag, ne_chunk
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
def pos_tags(text):
  return pos_tag(word_tokenize(text))
def named_entity(text):
  text = pos_tags(text)
  return ne_chunk(text)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


In [None]:
# returns length of sentence
def sent_length_score(text):
  text = sent_tokenize(text)
  max_len = max(len(i) for i in text)
  sent_lengths = []
  for sentence in text:
    sent_lengths.append(len(sentence)/max_len)
  return sent_lengths

In [None]:
# return count of digits
def sent_numerical_score(text):
  num_scores = []
  text = sent_tokenize(text)
  for sentence in text:
    count = 0
    words = word_tokenize(sentence)
    sent_len = len(words)
    for word in words:
      if word.replace('.','',1).isdigit():
        count+=1
    num_scores.append(count/sent_len)
  return num_scores

In [None]:
# return count of nouns in the sentence
from collections import Counter
def noun_counter(text):
  counts = []
  sentences = sent_tokenize(text)
  for sentence in sentences:
    a = pos_tags(sentence)
    count = Counter(tag for word,tag in a)
    counts.append(count['NN'])
  if len(counts)>0:
    m = max(counts)
  else:
    m = 0
  counts = [(count/m) for count in counts]
  return counts

In [None]:
# returns sentiment polarity
def sentiment_score(text):
  from textblob import TextBlob
  sentences = sent_tokenize(text)
  scores = []
  for sentence in sentences:
    blob = TextBlob(text)
    score = blob.sentiment.polarity
    if(score<0):
      score *=-1
    scores.append(score)
  return scores

In [None]:
def statistical_method(text,op_len):
  s = nltk.sent_tokenize(text)
  if(op_len==None):
    op_len = len(s)//5
  scores = []

  text1 = text
  sentences = nltk.sent_tokenize(text1) 
  score1 = (tf_isf_score(text1)) # for each sentence these 6 scores are calculated
  score2 = (position_score(text1))
  score3 = (sent_length_score(text1))
  score4 = (sent_numerical_score(text1))
  score5 = (noun_counter(text1))
  score6 = (sentiment_score(text1))
  for i in range(len(sentences)):
    scores.append(score1[i]+score2[i]+score3[i]+score4[i]+score5[i]+score6[i]) #the scores are summed
  data_op = pd.DataFrame({'text':s,'scores':scores})
  data_op = data_op.sort_values(by=['scores'],ascending=False) # scores are sorted
  data = data_op.values
  new_text = ""
  for i in range(op_len):
    new_text += data[i][0]+" "
  return new_text

In [None]:
text = "A car (or automobile) is a wheeled motor vehicle used for transportation. Most definitions of cars say that they run primarily on roads, seat one to eight people, have four tires, and mainly transport people rather than goods.[2][3]Cars came into global use during the 20th century, and developed economies depend on them. The year 1886 is regarded as the birth year of the modern car when German inventor Karl Benz patented his Benz Patent-Motorwagen. Cars became widely available in the early 20th century. One of the first cars accessible to the masses was the 1908 Model T, an American car manufactured by the Ford Motor Company. Cars were rapidly adopted in the US, where they replaced animal-drawn carriages and carts, but took much longer to be accepted in Western Europe and other parts of the world.[citation needed]Cars have controls for driving, parking, passenger comfort, and a variety of lights. Over the decades, additional features and controls have been added to vehicles, making them progressively more complex, but also more reliable and easier to operate.[citation needed] These include rear reversing cameras, air conditioning, navigation systems, and in-car entertainment. Most cars in use in the 2010s are propelled by an internal combustion engine, fueled by the combustion of fossil fuels. Electric cars, which were invented early in the history of the car, became commercially available in the 2000s and are predicted to cost less to buy than gasoline cars before 2025.[4][5]There are costs and benefits to car use. The costs to the individual include acquiring the vehicle, interest payments (if the car is financed), repairs and maintenance, fuel, depreciation, driving time, parking fees, taxes, and insurance.[6] The costs to society include maintaining roads, land use, road congestion, air pollution, public health, health care, and disposing of the vehicle at the end of its life. Traffic collisions are the largest cause of injury-related deaths worldwide.[7]The personal benefits include on-demand transportation, mobility, independence, and convenience.[8] The societal benefits include economic benefits, such as job and wealth creation from the automotive industry, transportation provision, societal well-being from leisure and travel opportunities, and revenue generation from the taxes. People's ability to move flexibly from place to place has far-reaching implications for the nature of societies.[9] There are around 1 billion cars in use worldwide. The numbers are increasing rapidly, especially in China, India and other newly industrialized countries[10]."
output_sentence = None
summary = statistical_method(text,output_sentence)
print("No of sentences in output :",len(nltk.sent_tokenize(summary)))
print(summary)

No of sentences in output : 4
[8] The societal benefits include economic benefits, such as job and wealth creation from the automotive industry, transportation provision, societal well-being from leisure and travel opportunities, and revenue generation from the taxes. Cars were rapidly adopted in the US, where they replaced animal-drawn carriages and carts, but took much longer to be accepted in Western Europe and other parts of the world. Most definitions of cars say that they run primarily on roads, seat one to eight people, have four tires, and mainly transport people rather than goods. Electric cars, which were invented early in the history of the car, became commercially available in the 2000s and are predicted to cost less to buy than gasoline cars before 2025. 


In [None]:
input_text = input("Enter the text to be summarised :")
output_sentence = None
summary = statistical_method(input_text,output_sentence)
print("No of sentences in output :",len(nltk.sent_tokenize(summary)))
print(summary)

Enter the text to be summarised :A car (or automobile) is a wheeled motor vehicle used for transportation. Most definitions of cars say that they run primarily on roads, seat one to eight people, have four tires, and mainly transport people rather than goods.[2][3]Cars came into global use during the 20th century, and developed economies depend on them. The year 1886 is regarded as the birth year of the modern car when German inventor Karl Benz patented his Benz Patent-Motorwagen. Cars became widely available in the early 20th century. One of the first cars accessible to the masses was the 1908 Model T, an American car manufactured by the Ford Motor Company. Cars were rapidly adopted in the US, where they replaced animal-drawn carriages and carts, but took much longer to be accepted in Western Europe and other parts of the world.[citation needed]Cars have controls for driving, parking, passenger comfort, and a variety of lights. Over the decades, additional features and controls have b