In [21]:
from sklearn.decomposition import NMF
import numpy as np
from sklearn import feature_extraction
import sys
import os
stopwords = feature_extraction.text.ENGLISH_STOP_WORDS
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
lemmatiser = WordNetLemmatizer()
from string import punctuation
from nltk.tokenize import word_tokenize
import operator
import math
import re
import collections

from bs4 import BeautifulSoup

In [2]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
newsgroups_train = fetch_20newsgroups(remove=('headers', 'footers', 'quotes'))
newsgroups_label = newsgroups_train.target
newsgroups_data = newsgroups_train.data

In [8]:
from nltk.corpus import wordnet
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return ''

In [9]:
def removeStopwords(wordlist):
    return [w for w in wordlist if w not in stopwords]

In [10]:
def strip_punctuation(s):
    return ''.join(c for c in s if c not in punctuation)

In [11]:
def htmltagslines(wordlist):
    return [w for w in wordlist if w not in stopwords]

In [12]:
def getwords(sentence):
    sentence = strip_punctuation(sentence.lower())
    list_of_words = sentence.split();
    list_of_words = removeStopwords(list_of_words);
    return list_of_words

In [13]:
def lematized_word(postaged,word):
    try:
        postager = get_wordnet_pos(postaged[word])
        if(postager == ''):
            lmzword = lemmatiser.lemmatize(word)
        else:
            lmzword = lemmatiser.lemmatize(word,pos=postager)
    except:
        lmzword = lemmatiser.lemmatize(word)
        
    return lmzword

In [14]:
def term_freq_per_sentence(sentences):
    tf = {}
    for sentence in sentences:
        try:
            postaged = dict(pos_tag(word_tokenize(sentence)))
            list_of_words = getwords(sentence)
            for word in list_of_words:
                lmzword = lematized_word(postaged,word)
                tf[lmzword] = tf.get(lmzword, 0) + 1
        except:
            print(sentence)
    return tf

In [15]:
def joint_freq(sentence, bestsentences):
    
    bstcount = 0
    for bs in bestsentences:
        bstcount += len(getwords(bs))
       
    length = len(getwords(sentence)) + bstcount
    
    s_freq = term_freq_per_sentence([sentence])
    bestsentencetf = term_freq_per_sentence(bestsentences)
    
    for word in bestsentencetf:
        if word in s_freq:
            s_freq[word] += bestsentencetf[word]
        else:
            s_freq[word] = bestsentencetf[word]
            
    for word in s_freq:
        s_freq[word] /= float(length)

    return s_freq

In [16]:
def KL_dvg(joint_freq,tf):
    count = 0.0
    for word in joint_freq:
        count += tf[word] * math.log(tf[word] / joint_freq[word])
    
    return count

In [17]:
def cal_rating(doc):

    rate = {}
    e = r'\S*@\S*\s?'
    pattern = re.compile(e)
    doc = doc.strip()
    doc.replace("\n"," ")
    rate = {}
    doc = pattern.sub('', doc)
    bestsentences = []
    sentences = re.split("[.]",doc)
    sentences = list(filter(None, sentences))
    tf = term_freq_per_sentence(sentences)
    tempsentences = list(filter(None, sentences))

    count = len(sentences)
    if(count < 50):
    
        while(len(tempsentences) > 0):
            KL = []
            sentences = tempsentences
            for sentence in sentences:
                joint_freq_sentence = joint_freq(sentence,bestsentences)
                kld = KL_dvg(joint_freq_sentence,tf)
                KL.append(kld)
            index = KL.index(min(KL))
            bestsent = tempsentences.pop(index)
            bestsentences.append(bestsent)
            rate[count] = bestsent.strip()
            count -= 1

        return rate
    return 0

In [20]:
import elasticsearch
es = elasticsearch.Elasticsearch()
index_no = "klsum20ng"
doc20Ngtype = "20ngklasumm"
index = 0
for newgroupdoc in newsgroups_data:
    try:
        docsummary = cal_rating(newgroupdoc)
        if(docsummary != 0):
            print(index)
            od = [a[1] for a in sorted(docsummary.items(), key=lambda x: x[0], reverse=True) ]
            val = " ~~ ".join(od)
            es.index(index=index_no, doc_type=doc20Ngtype, body={
                    'doc_id': index,
                    'doc_text': val
                    })
        index += 1
        if(index == 50):
            break
        
    except Exception as e:
        print(e.message, e.args)
        print("Exception: " + str(index))
        print("docsummary: " + val)
        break
    

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [22]:
dirPath= "/Users/panktibhalani/Downloads/DUC2001"
document_map={}
for fname in os.listdir(dirPath):
    f_path= dirPath + "/" + fname
    if (not os.path.isdir(f_path)) and (not fname.startswith('notes')) and (not fname.startswith('annotations')) and (not fname.startswith('.')):
        with open(f_path,'r', encoding="utf8", errors='ignore') as file:
            soup = BeautifulSoup(file.read(),"xml")
            print(fname)
            volume = soup.find_all("TEXT")
            val =  volume[0].text.replace("<TEXT>","")
            val =  val.replace("</TEXT>","")
            document_map[fname.lower()]=val.strip()

AP830325-0143
AP880217-0175
AP880318-0051
AP880330-0119
AP880331-0140
AP880409-0015
AP880419-0131
AP880510-0178
AP880517-0226
AP880520-0264
AP880601-0040
AP880613-0161
AP880623-0135
AP880629-0159
AP880630-0295
AP880705-0006
AP880705-0018
AP880705-0109
AP880714-0142
AP880801-0195
AP880811-0299
AP880816-0234
AP880901-0052
AP880902-0062
AP880903-0092
AP880913-0129
AP880913-0204
AP880914-0027
AP880914-0079
AP880926-0203
AP880927-0089
AP880927-0117
AP880928-0054
AP880928-0146
AP881009-0072
AP881017-0235
AP881018-0136
AP881126-0007
AP881206-0114
AP881210-0115
AP881211-0027
AP881216-0017
AP881222-0089
AP881222-0119
AP881222-0126
AP881227-0185
AP890111-0217
AP890111-0227
AP890117-0132
AP890131-0280
AP890227-0016
AP890228-0019
AP890302-0063
AP890307-0150
AP890313-0198
AP890314-0237
AP890316-0018
AP890322-0010
AP890325-0029
AP890326-0081
AP890403-0123
AP890404-0260
AP890501-0176
AP890502-0205
AP890511-0126
AP890529-0030
AP890704-0043
AP890708-0135
AP890714-0129
AP890719-0225
AP890722-0081
AP8908

In [21]:
import elasticsearch
es = elasticsearch.Elasticsearch()
index_no = "klsumduc"
doc20Ngtype = "ducklasumm"
index = 0
for newgroupdoc in document_map:
    try:
        
        docsummary = cal_rating(document_map[newgroupdoc])
        if(docsummary != 0):
            print(index)
            od = [a[1] for a in sorted(docsummary.items(), key=lambda x: x[0], reverse=True) ]
            val = " ~~ ".join(od)
            print(val)
            es.index(index=index_no, doc_type=doc20Ngtype, body={
                    'doc_id': newgroupdoc,
                    'doc_text': val
                    })
        index += 1
        if(index == 50):
            break
        
    except Exception as e:
        print(e.message, e.args)
        print("Exception: " + str(index))
        print("docsummary: " + val)
        break
    

1
citizerns ~~ Rep ~~ Rep ~~ Tom Ridge, R-Pa ~~ Jan Meyers, R-Kan ~~ S ~~ S ~~ S ~~ </TEXT>
</DOC> ~~ District Court in Pittsburgh, spokesmen said at a
news conference here ~~ , suggested including a question on the
Census form asking whether respondents are U ~~ Census officials say they are required to count everyone by the
U ~~ That approach
was upheld by a federal court in a similar suit, brought by the
same immigration reform group, before the 1980 Census ~~ Some 40 members of the House joined the Federation for American
Immigration Reform in announcing that the suit would be filed
Thursday in U ~~ Constitution, which does not mention citizenship but only
instructs that the House apportionment be based on the ``whole
number of persons'' residing in the various states ~~ , said the Census Bureau should actually
count everyone but that it should develop a method to determine how
many people are illegally in the country, and them deduct that
number from the figures used for reapporti

4
''
   James R ~~ ''
</TEXT>
</DOC> ~~ Another misconception is about opening windows ~~ No matter how
much preparation you do there is some property damage and some loss
of life ~~ More than 60 percent of those occur between April and June,
records show ~~ A few twisters were already recorded in early March in Texas but
they did little or no damage ~~ At home, the best place to stay is a basement or underground
storm shelter ~~ If no such shelter is available, go inside a closet
in the center of the house or bathroom or lie flat under a heavy
table ~~ It was once
believed that windows needed to be open to equalize air pressure
between the storm and the inside of a house to prevent the house
from exploding ~~ But there are some things you can do to protect yourself
and property,'' said Laureen Chernow, a spokeswoman for the
governor's division of emergency management ~~ Take cover immediately and don't go outside, but if you
are in a car or mobile home you will be safer taking cover i

7
</TEXT>
</DOC> ~~ S ~~ S ~~ S ~~ S ~~ S ~~ He refused to
identify the government ~~ He produced a photocopy of the alleged document ~~ It appeared to
be part of a longer document with the word ``CONFIDENTIAL'' stamped
at the bottom ~~ He added that Arafat's interpretation of those contacts was
``entirely without foundation ~~ ''
   Arafat spoke at a news conference in his heavily guarded villa
in Baghdad, where extra security guards have been deployed ~~ ''
   Arafat said the document ``reveals the U ~~ <DOC>
<DOCNO> AP880510-0178 </DOCNO>
<FILEID>AP-NR-05-10-88 1630EDT</FILEID>
<FIRST>r i AM-PLO-US     05-10 0480</FIRST>
<SECOND>AM-PLO-US,0504</SECOND>
<HEAD>Arafat Says U ~~ It read:
   ``You may be aware of charges in several Middle Eastern and
particulary Palestinian circles that the U ~~ Wazir was slain April 16 during a raid on his house near Tunis,
Tunisia ~~ The document, which was typewritten in English, referred to
Wazir by his code name, Abu Jihad ~~ knew of and approved
Ab

10
S ~~ </NOTE>
<BYLINE>By DONALD W ~~ </TEXT>
</DOC> ~~ (AP) </DATELINE>
<TEXT>
   A study has found that the U ~~ SWINTON</BYLINE>
<BYLINE>Associated Press Writer</BYLINE>
<DATELINE>CORAL GABLES, Fla ~~ A previous GOES-East expired in 1984 and temporarily deprived
meteorologists of Atlantic atmospheric photographs ~~ The system contained
winds of up to 30 mph and was not expected to strengthen ~~ 22,
causing three deaths with wind gusts up to 110 mph ~~ It recovered
enough punch to belt Bermuda with 116 mph wind three days later ~~ Hurricane Arlene meandered through the Atlantic in mid-August
with top wind of 75 mph ~~ Hurricane Emily slammed into the Dominican Republic on Sept ~~ Hurricane Floyd hit Key West with 75 mph
wind but fizzled out over the Everglades and Miami in mid-October ~~ A typical Atlantic hurricane season, from June 1 to Nov ~~ In 1987 there were only three hurricanes and four tropical
storms in the Atlantic ~~ During a hurricane that struck Houston, ``glass was fl

13
Pick up 9th pvs,
`The other ~~ m ~~ m ~~ The condition of
those on board is unknown,'' Ms ~~ </TEXT>
</DOC> ~~ S ~~ S ~~ S ~~ S ~~ EDT)
near the village of Marxzell-Burbach ~~ ``There was one person aboard each aircraft ~~ Identities of the pilots were not
immediately released ~~ Hayes told The Associated Press in
a telephone interview from U ~~ </NOTE>
<DATELINE>BODENHEIM, West Germany (AP) </DATELINE>
<TEXT>
   Two U ~~ West German police and U ~~ West German police spokesman Hugo Lenxweiler told the AP in a
telephone interview that one of the pilots was killed in the
accident ~~ Both planes exploded on impact, he
said ~~ ``The other pilot was able to eject safely,'' Lenxweiler said ~~ Lenxweiler said he did not know if the pilot who ejected
suffered any injuries ~~ He said preliminary information indicated that one of the F-16s
rammed the other from behind ~~ West German police said one pilot was killed in the in-flight
crash ~~ He said the planes crashed within several
hundred y

17
''
   U ~~ S ~~ ``It's been real quiet out there,'' Sgt ~~ </TEXT>
</DOC> ~~ No injuries or property damage were
reported ~~ ``I think they've got
it just about out ~~ Stanislaus was
the scene of huge wildfires last summer ~~ Picks up in 4th graf, `A four-day ~~ , was reported extinguished on Monday ~~ High
winds Saturday tripled the fire's size ~~ The fire was the largest in the forest this year ~~ ``It may not be declared out
until there's a three-day rain,'' he said Monday ~~ Forest Service spokesman Dale Bluedorn agreed, but said
they will continue to keep patrolling ~~ Terry Leisening of the
Delta County Sheriff's Department said today ~~ However, on Sunday, winds ranging from 30 mph to 60 mph fanned
the flames out of control, Chief Ranger Bob Andrew said ~~ Fire information officer Dave Damron said water was also being
pumped out of bogs in the Shoshone forest to help contain the fire ~~ The fire, which began Saturday, was
declared contained this morning,
   ``We have evidence

22
Gov ~~ The U ~~ S ~~ S ~~ He put the cost at $15 million ~~ The supply of fresh fire crew members dwindles as workers enter
their third week of duty ~~ More than 1,900 people were assigned to fire lines and mop-up
duties in Washington state ~~ Both the Huck fire and the 112,500 Mink Creek fire also are growing
north toward Yellowstone ~~ Ted Schwinden appealed to the public Wednesday to avoid
outdoor recreation over the Labor Day weekend to limit the fire
risk ~~ 2
million acres ~~ The fire is
expected to grow to 59,000 acres before it reaches fire lines ~~ The Mariposa
blaze destroyed at least one home and grew to 250 acres ~~ The largest blaze, the Clover-Mist, grew by nearly
40,000 acres to 231,000 acres ~~ The fire was the most threatening of 17 major blazes
burning over more than 70,000 acres ~~ The state's largest fire was burning on the Indian reservation,
where 9,250 acres of timber and brush have burned ~~ Fires have
burned more than 23,000 acres in the state ~~ Eight fires

25
C ~~ Capt ~~ Cmdr ~~ m ~~ MDT ~~ It's a very
populated area ~~ ``I thought they were just doing tricks ~~ </TEXT>
</DOC> ~~ In the Atlantic accident, Lt ~~ ADDS 2 grafs on end with details ~~ A fishing boat picked up a
crewman, who was pronounced dead ~~ The identity of the dead aviator and his missing crewmate were
not released pending notification of relatives ~~ The crash off Hatteras, N ~~ The blaze ignited by the crash
destroyed a hangar and an attached extension, but spared a nearby
restaurant ~~ The $35 million jet crashed upside down into hangars at
Gillespie Field and exploded ~~ ''
   The jet passed within a mile of an elementary school ~~ The cause of the crash was not determined, officials said ~~ And then we saw the
parachutes,'' said Washington Moscuso, a sixth-grader at Ballantyne
Elementary School ~~ Authorities said the two crewman tried to guide the jet to the
runway at Gillespie Field before bailing out ~~ The
aircraft sank soon after impact, John said ~~ ``It was

28
 ~~  ~~ m ~~ 17 inches ~~ 13
inches at 5:58 p ~~ </TEXT>
</DOC> ~~ ``I've done this for 25 years so at one stage you're prepared to
do it or not ~~ Jorge steps in to
coordinate media interviews ~~ On days
with especially active tropical weather, Ms ~~ ``You feel
a lot more confident about what you're doing than with weaker
systems ~~ ''
   ``It does what we think it's going to do,'' he said ~~ I really don't think this
is difficult,'' he said ~~ The barometric pressure at the storm's center plummeted to 26 ~~ I don't feel a lot of pressure,'' said Sheets,
while eating lunch ~~ ``He knows his job very well and he's a calm personality,''
Vivian Jorge, the center's budget analyst, said of Sheets ~~ What's Frank, now a television forecaster for Texas station
KHOU-TV, doing now?
   ``He's getting geared up in Houston,'' said Sheets with a smile ~~ ``I've flown into 200 hurricanes ~~ ''
   His previous experience with hurricanes has served him well in
keeping off the pressure during these

31
He hasn't got the guile to do that ~~ ``Everybody knew in advance that Mr ~~ </TEXT>
</DOC> ~~ He did not knowingly do this ~~ ``I don't believe he did it on
purpose ~~ ``From time to time people would come to me in a private way _
and this happened one or two times ~~ ''
   Outside the townhouse, police were called in to control the
crowd and the traffic ~~ ``My brother is not guilty,'' a distaught Rodney told reporters
in her yard in the Toronto suburb of Rexdale ~~ Prime Minister Brian Mulroney, who had thanked the Jamaican-born
runner for the ``thrill of a lifetime'' after his record-breaking
9 ~~ Throughout Monday evening, local children
attempted to raise a chorus of ``Ben! Ben! Ben!'' only to give up
when the crowd would not respond ~~ Ben does not do drugs ~~ ``If you could cut
him into a million pieces and test him over again _ my brother is
not on drugs ~~ 79-second performance Saturday, called the drug scandal ``a
moment of great sorrow for all Canadians ~~ ''
   A disapp

33
Dr ~~ It's cheating ~~ ''
   ``We're feeling low ~~ ``There was enormous exhilaration ~~ 79 seconds ~~ ``But it is also just a foot race ~~ ''
</TEXT>
</DOC> ~~ ``We look up to the guy ~~ ``He's letting all his fans down ~~ He was stripped of the medal Tuesday ~~ Now it has forfeited
its only gold medal of the Olympics ~~ Canadian youth took the news hard ~~ ``What Johnson did was wrong ~~ Johnson, a 26-year-old Jamaican transplant, waved the Canadian
flag in triumph and dedicated his gold to his mother and all
Canadian citizens ~~ ``The
Ben Johnson episode is a tragedy of shocking proportions ~~ George Astaphan, Johnson's personal physician, and Larry
Heidebrecht, Johnson's agent, insisted the sprinter had not taken
stanzolol ~~ The euphoria was dashed when Johnson tested positive
for stanzolol, a muscle-building steroid outlawed by Olympic
officials ~~ The spirit of a nation raced with Johnson when he won the gold
medal in the 100 meters on Saturday with a world record time of
9 ~

37
</TEXT>
</DOC> ~~ It often can be controlled through diet and exercise ~~ Luke's-Roosevelt Hospital Center in New York ~~ Complications can include kidney disease, blindness, and gangrene
that requires amputations ~~ Currently, obesity is considered a major
contributor to the disease rather than a result of it ~~ Insulin normally controls the level of blood sugar ~~ The new work is ``a very important finding'' if amylin truly
blocks insulin and appears in abnormal amounts in diabetics, F ~~ ``We have a lot of evidence that this is likely to be, if not the
final cause, at least a major part of the disease process,'' said
New Zealand biochemist Garth Cooper ~~ In his presentation, Cooper said the hormone, dubbed ``amylin,''
was normally undetectable but found in high levels in the pancreases
of diabetics ~~ The research ``opens the door to the scientific study of the
disease at a level that wasn't possible before and potentially the
mechanisms that we uncover may be very wide ranging,

41
``No ~~ </TEXT>
</DOC> ~~ The panel's report will remain open for public comment for 60
days before any recommendations are adopted ~~ ``My guess is that the moratorium would be finished by the
beginning of the Western fire season, which is the middle of May,''
Charles Philpot, co-chairman of the review panel, told a news
conference ~~ The panel was assembled last September after the worst fire
season ever in drought-primed Yellowstone National Park ~~ The review team was asked to look at policies throughout the
national parks and wilderness areas, not just Yellowstone ~~ However,
it did not consider policies in other areas such as ordinary
national forests, where the Forest Service tries to protect
commercial timbering operations ~~ Some 249
fires seared 706,278 acres within the park boundaries and 40 percent
as much again in nearby national forests ~~ Residents in the fire vicinity complained bitterly that the park,
tourism and the very air they breathed were being ruined by the
f

44
S ~~ m ~~ 2 ~~ 8 in Tracy, Ariz ~~ ''
</TEXT>
</DOC> ~~ EST about four miles
south of St ~~ Ives, or about 25 miles north of Cambridge ~~ 75 million flight hours ~~ He said he had no information about damage on the ground ~~ The Pentagon said the pilot might have ejected safely ~~ ''
   ``The pilot ejected and his chute was sighted but we don't know
his status,'' Howard said ~~ A Class A mishap is one in which there is either a fatality or
damage exceeding $500,000 to an aircraft ~~ The aircraft was carrying ``dummy bombs and practice 33mm
ammunition,'' the spokesman said ~~ The aircraft has chalked up
a major accident rate of less than 4 Class A mishaps per 100,000
flying hours over its lifetime _ which now consists of more than
1 ~~ The suspension was ordered as German political parties and public
interest groups increased their demands for an end to low-level
flight training in Germany ~~ Howard said the plane ``crashed at 9:53 a ~~ It was the second crash of a Thunderbolt jet in

In [19]:
sum_map = {}
for fname in os.listdir(dirPath + "/Summaries"):
        f_path= dirPath + "/Summaries/" + fname
        with open(f_path,'r', encoding="utf8", errors='ignore') as fileopned:
            sum_map[fname.split("/")[0].split(".")[0].lower()]=fileopned.read()

In [27]:
from rouge import Rouge
rouge = Rouge()

index = 0
for newgroupdoc in document_map:
    docsummary = cal_rating(document_map[newgroupdoc])
    if(docsummary != 0):
        od = [a[1] for a in sorted(docsummary.items(), key=lambda x: x[0], reverse=True) ]
        
        print(sum_map[newgroupdoc])
        print(" ".join(od))
        
        scores = rouge.get_scores(sum_map[newgroupdoc], " ".join(od))
        print(str(index) + ":" + str(scores))
        
    index += 1
    
    if(index == 5):
        break

Abstract:
Some 40 members of Congress have joined with the Federation for American Immigration Reform in announcing that a suit will be filed in an effort to stop the Census Bureau from counting the estimated 2 million illegal aliens living in the United States. Rep. Tom Ridge, R-Pa., said everyone should be counted but the Census Bureau should develop a method to determine how many people are illegal and deduct that number from the figures used for reapportioning Congress. \011 Rep. Jan Meyers, R-Kan., suggested that a question should be put on the Census form asking whether respondents are U.S. citizens.
Introduction:
   A coalition of members of Congress announced
Wednesday that they plan to sue the Census Bureau in an effort to
force the agency to delete illegal aliens from its count in 1990.
   Some 40 members of the House joined the Federation for American
Immigration Reform in announcing that the suit would be filed
Thursday in U.S. District Court in Pittsburgh, spokesmen said a

2:[{'rouge-1': {'f': 0.9026548622653398, 'p': 0.866504854368932, 'r': 0.941952506596306}, 'rouge-2': {'f': 0.8463999950184834, 'p': 0.7978883861236803, 'r': 0.9011925042589438}, 'rouge-l': {'f': 0.2524264184115996, 'p': 0.23417721518987342, 'r': 0.2854938271604938}}]
Abstract:
Population experts say that little would change in allocating seats in Congress if two sides get their way in trying to force changes in the 1990 census. One group has filed suit to ignore the aliens while other groups want the final census count to be increased to account for people who may be overlooked or undercounted. William O'Hare, director of policy studies for the independent Population Reference Bureau, said "the only change would be a flip-flop of one seat from California to Georgia". If aliens are counted, California is estimated to gain six seats with four additional seats going to Florida and three to Texas.
Introduction:
   If the two sides trying to force changes in
the 1990 census both get their w

4:[{'rouge-1': {'f': 0.8885448866502602, 'p': 0.8516320474777448, 'r': 0.9288025889967637}, 'rouge-2': {'f': 0.8237623712602884, 'p': 0.7717996289424861, 'r': 0.8832271762208068}, 'rouge-l': {'f': 0.2816179560076765, 'p': 0.2593192868719611, 'r': 0.3258655804480652}}]
