In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

from sklearn.metrics.pairwise import cosine_similarity
import spacy
nlp = spacy.load('en')
from spacy.symbols import nsubj, dobj, VERB

In [2]:
from settings import *
from utils import *

### Loads GloVe Embeddings

In [3]:
from gloveEmbeddings import *
loadGloveEmbeddings(gloveFile)

### Creates Keyword Vectors

In [30]:
sourcesKeywords = ['paper', 'report', 'study', 'analysis', 'research', 'survey', 'release']
peopleKeywords = ['expert', 'scientist']
actionsKeywords = ['prove', 'demonstrate', 'reveal', 'state', 'mention', 'report', 'say', 'show', 'announce', 'claim', 'suggest', 'argue']

sourcesKeywordsVec = [word2vec(p) for p in sourcesKeywords]
peopleKeywordsVec = [word2vec(p) for p in peopleKeywords]
actionsKeywordsVec = [word2vec(p) for p in actionsKeywords]

### Searches (on the vector space) for sentences containing the given subject and predicate keywords.

In [16]:
def keywordSearch(title, body):
    subjectThreshold = 0.9
    predicateThreshold = 0.9

    claims = []
    for s in sent_tokenize(body):
        subjectFound = predicateFound = False
        claim = ""
        for w in wordpunct_tokenize(s):

            if predicateFound == True:
                claim = s
                claims.append(claim)
                break

            wVec = word2vec(w)

            if subjectFound == False:
                for sVec in sourcesKeywordsVec+peopleKeywordsVec:
                    if abs(cosine_similarity(sVec.reshape(1, -1), wVec.reshape(1, -1))) > subjectThreshold:
                        subjectFound = True
                        break

            if subjectFound == True:
                for pVec in actionsKeywordsVec:
                    if abs(cosine_similarity(pVec.reshape(1, -1), wVec.reshape(1, -1))) > predicateThreshold:
                        predicateFound = True
                        break
    return claims


In [80]:
def claimExtraction(limitDocuments=10):
    query = createQuery(limitDocuments, 'web')
    documents = queryDB(query)

    global cc 
    cc = 0
    
    def checkAction(w):
        for p in actionsKeywords:
            if w is not None and w.lemma_ == nlp(p)[0].lemma_:
                return True
        return False
    
    def resolveClaimer(claimer, entities):
        
        try:
            c = next(nlp(claimer).noun_chunks)
        except:
            for e in entities:
                if e.label_ in ['PERSON', 'ORG']:
                    return e.text, e.label_
            return '', 'unknown'        
        
        for e in entities:
            if c.text == e.text and e.label_ in ['PERSON', 'ORG']:
                return c.text, e.label_
                   
        for w in sourcesKeywords:
            if c.root.lemma_ == nlp(w)[0].lemma_:
                for e in entities:
                    if e.label_ in ['PERSON', 'ORG']:
                        return e.text, e.label_
                return 'study', 'unknown'

        for w in peopleKeywords:
            if c.root.lemma_ == nlp(w)[0].lemma_:
                for e in entities:
                    if e.label_ in ['ORG']:
                        return e.text, e.label_
                return 'expert', 'unknown'

        return c.text, 'unknown'   
    
    def dependencyGraphSearch(title, body):
        
        claims = []
        for s in sent_tokenize(body):
            claimFound = claimerFound = False
            claim = claimer = claimerType = ""
            
            doc = nlp(s)
            
            #find all verbs of the sentence.
            verbs = set()
            for v in doc:
                if v.head.pos == VERB:
                    verbs.add(v.head)
            
            if not verbs:
                continue
 
            rootVerb = ([w for w in doc if w.head is w] or [None])[0]
            
            #check first the root verb and then the others.
            verbs = [rootVerb] + list(verbs)
            
            for v in verbs:
                if checkAction(v):            
                    
                    for np in doc.noun_chunks:
                        if np.root.head == v:

                            if(np.root.dep == nsubj):
                                claimer = np.text
                                claimerFound = True
                                
                            if(np.root.dep == dobj): #TODO
                                pass
                                
                            claimFound = True
                    
                    if claimerFound:
                        break
                            
                    
                
            if claimFound:
                    claim = s
                    global cc 
                    cc += 1
                    claims.append(claim)
                    claimer, claimerType = resolveClaimer(claimer, doc.ents)
                    print('claim: ', claim)
                    print('by: ', claimer, '(', claimerType, ')')
                    print()
                    continue

        return claims

    
    claims = documents.apply(lambda d: dependencyGraphSearch(d['title'],d['body']), axis=1)
    
    print (cc)
    return len(claims[0])

claimExtraction(100)    

claim:  Three studies show the benefits of healthier school meals.
by:  study ( unknown )

claim:  From The Rudd Center, now at U. Conn: A press release announces publication of its new study in Childhood Obesity demonstrating that the rules have led to an increase in fruit consumption without increasing plate waste.
by:  The Rudd Center ( ORG )

claim:  From the Harvard School of Public Health: It also sends a press release to announce its study demonstrating that an increase in consumption of fruits and vegetables is a direct result of the new USDA standards, and that these also do not increase plate waste.
by:  the Harvard School of Public Health ( ORG )

claim:  From the Union of Concerned Scientists: UCS announces a new position paper, “Lessons from the Lunchroom: Childhood Obesity, School Lunch, and the Way to a Healthier Future,” also documenting why school meals are so important to kids’ health.
by:  UCS ( ORG )

claim:  Postscript: Dana Woldow argues that the school food scene

claim:  Bach says that is what’s happening in his area — CRP contracts are expiring and that land is going into corn and soybeans.
by:  Bach ( ORG )

claim:  As soon as a CRP contract comes up, it’s plowed up, and that’s directly related to the revenue guaranteed by crop insurance,” he says.
by:  he ( unknown )

claim:  “This has doubled the cost to taxpayers and opened the door for large payments to producers who suffer only paper losses,” says Bruce Babcock, an Iowa State University economist who studies crop insurance.
by:  Bruce Babcock ( PERSON )

claim:  In a politically savvy move, the National Corn Growers Association, American Soybean Association and other commodity groups have said publicly they would be willing to give up direct payments as a budget-cutting move in the next Farm Bill.
by:  other commodity groups ( unknown )

claim:  “This was set up to be a safety net for real farmers out there producing crops, not a tool for concentrating wealth and emptying our towns,” he 

claim:  Risk estimates for other categories of spicy food consumption are shown in appendix table 2.
by:   ( unknown )

claim:  Appendix table 1 shows the risk estimates for other categories of spicy food consumption.
by:  Appendix table ( unknown )

claim:  Horizontal lines represent 95% confidence intervals

We further performed stratified analyses according to whether the participants reported using fresh chilli pepper as their predominant spice.
by:  the participants ( unknown )

claim:  Age adjusted and multivariate adjusted analyses showed a statistically significant inverse association between spicy food consumption and total mortality.
by:  multivariate ( unknown )

claim:  Compared with participants who ate spicy foods less than once a week, those who consumed spicy foods 6 or 7 days a week showed a 14% relative risk reduction in total mortality.
by:   ( unknown )

claim:  Fresh and dried chilli peppers were the most commonly used types of spices in those who reported consumin

claim:  In Britain, this strategy is becoming more overt, with Ian Boyd, the chief scientific adviser at the Department for Environment, Food and Rural Affairs, writing recently that scientists should avoid "suggesting that policies are either right or wrong" and should express their views "by working with embedded advisers (such as myself), and by being the voice of reason, rather than dissent, in the public arena".If you want to know where this leads, check out what's happening in Canada, where I live.. Their placards said, "No Science, No Evidence, No Truth".But the truth is getting out anyway.
by:  Their placards ( unknown )

claim:  Critics say what people really need in a crisis is money to buy their own food.
by:  Critics ( unknown )

claim:  Search for evidence



Both sides claim they’re right, says economist John Hoddinott at the International Food Policy Research Institute, but neither has much evidence to back them up.
by:  economist John Hoddinott ( unknown )

claim:  “Wha

claim:  Authorities say gunman Adam Lanza killed his mother at their home on Friday and then opened fire inside the Sandy Hook Elementary School in Newtown, killing 26 people, including 20 children, before taking his own life.
by:  Authorities ( unknown )

claim:  Authorities say Lanza killed his mother at their home and then opened fire inside the Sandy Hook Elementary School in Newtown, killing 26 people, including 20 children, before taking his own life, on Friday.
by:  Authorities ( unknown )

claim:  Authorities say gunman Adam Lanza killed his mother at their home on Friday and then opened fire inside the Sandy Hook Elementary School in Newtown, killing 26 people, including 20 children, before taking his own life.
by:  Authorities ( unknown )

claim:  Authorities say gunman Adam Lanza killed his mother at their home on Friday and then opened fire inside the Sandy Hook Elementary School in Newtown, killing 26 people, including 20 children, before taking his own life.
by:  Authorit

claim:  It's an intermittent fasting approach that, as we've reported, has been popularized by books by British physician and television broadcaster Michael Mosley.
by:  we ( unknown )

claim:  The diet calls for two days per week of minifasting where the aim is to go a long stretch, say 14 to 18 hours, without eating.
by:  14 to 18 hours ( unknown )

claim:  The fascination is what researchers say may be the broader benefits.
by:  researchers ( unknown )

claim:  As Allison reports on All Things Considered, she found that she's just less hungry the day after a fast.
by:  Allison ( PERSON )

claim:  Mark Mattson, a researcher at the National Institute of Aging, says when we go without food, the body uses up its stored glucose, the basic fuel for the body, and starts burning fat.
by:  Mark Mattson ( PERSON )

claim:  During fasting, he says, fat can convert to compounds called ketones, "which have beneficial effects in making neurons more resistant to injury and disease."
by:  he ( unkn

claim:  This is a measure that will help efforts to reduce consumption of sugar-sweetened beverages, which research shows are a major contributor to increased calorie intake by both children and adults, thus potentially contributing to the nation’s obesity epidemic,” said a statement from the Society at the time.
by:  a statement ( unknown )

claim:  "I think the rising cost of crop insurance will bring even more attention to crop insurance than has been paid so far," said Craig Cox of the Environmental Working Group, which says crop insurance is skewed in favor of big farmers and needs reform.
by:  Craig Cox ( PERSON )

claim:  Indemnities will be so large, the companies will pay out all the money they collected in premiums this year, $11 billion, plus $2 billion to $3 billion more, say agricultural economists and the catastrophe modeling company AIR Worldwide.
by:  agricultural economists ( unknown )

claim:  "We don't expect it to be huge rate increases because it will be blended in

claim:  "Oh, those eggs you get in the U.S. are only yellow because they're pumped full of hormones," he said.
by:  he ( unknown )

claim:  Yours,

Ari

Dear Ari,

White egg yolks may look bizarre, but poultry scientists I spoke with say there's nothing to worry about.
by:  expert ( unknown )

claim:  " says Scott Beyer, a poultry specialist with the state of Kansas.
by:  Scott Beyer ( PERSON )

claim:  Beyer says egg yolk color is almost entirely influenced by the birds' diet.
by:  Beyer ( PERSON )

claim:  In most parts of the world, he says, diners prefer their yolks with a sunnier disposition, so commercial feeds often contain lutein as an additive, though yellow maize, soybeans, carrots and alfafa powder will also do the trick.
by:  he ( unknown )

claim:  On the other end of the rainbow, says Beyer, are the yolks in some parts of South America, where hens will peck at dark red annatto seeds.
by:  Beyer ( PERSON )

claim:  Brilliant yolks ranging from dark orange to red orange to 

claim:  "Across the important antioxidant compounds in fruits and vegetables, organic fruits and vegetables deliver between 20 and 40 percent higher antioxidant activity," says Charles Benbrook, from Washington State University's Center for Sustaining Agriculture and Natural Resources, a co-author of the study.
by:  Charles Benbrook ( PERSON )

claim:  Their effects remain somewhat murky, but scientists say they can protect cells from the effects of aging, or from the sort of damage that can lead to cancer.
by:  expert ( unknown )

claim:  Benbrook says this is a big reason why public health experts want us all to eat more fruits and vegetables: They deliver a good dose of antioxidants.
by:  Benbrook ( unknown )

claim:  And if organic produce provides more of them, he says, "we think that's a big deal."
by:  he ( unknown )

claim:  "Plants in an organic field are getting chewed on," he says.
by:  he ( unknown )

claim:  The second reason, Benbrook says, is that organic crops aren't ge

claim:  Researchers from the University of California, Los Angeles say that living in an area with outdoor fast-food advertisements could lead to weight gain.
by:  Researchers ( unknown )

claim:  HealthDay News reports that the study shows “an increased likelihood of obesity in neighborhoods with the most outdoor fast-food ads.” Such ads are more prevalent in low-income, minority neighborhoods — which also often lack access to healthy food.
by:  HealthDay News ( ORG )

claim:  The Los Angeles Times reports that nearly 70 percent of California voters think a sugary drink tax is a “good idea” if the money supports nutrition and physical activity education programs in schools.
by:  The Los Angeles Times ( ORG )

claim:  Vermont House Committee Debating SSB Tax

Legislators in Vermont are studying a plan to tax sugary drinks to help reduce obesity, Vermont Public Radio reports.
by:  Public Radio ( unknown )

claim:  Stress May Drive Obesity in Kids

Researchers at Penn State and Johns Hop

claim:  "That's an exciting change," Catlin said.
by:  Catlin ( PERSON )

claim:  The report shows how each of a state's counties compares with the others, which gives leaders a chance to see how they can create healthier environments.
by:  study ( unknown )

claim:  "The shame can lead to change," Catlin said.
by:  Catlin ( PERSON )

claim:  "We're seeing mayors and civic leaders starting to get this," Marks said.
by:  Marks ( PERSON )

claim:  But until now, nobody had really put these pieces together," said Gitanjali Singh, a postdoctoral research fellow at the Harvard School of Public Health in Boston and lead author of the study presented today at the American Heart Association's annual meeting in New Orleans.
by:  Gitanjali Singh ( PERSON )

claim:  "I think our findings should really impel policymakers to make effective policies to reduce sugary beverage consumption since it causes a significant number of deaths," said Singh, adding that she thinks "cause" is an appropriate word

claim:  A 1990 report from an RJR employee suggests that industry policy positions were promoted through BEEP seminars:

“The importance of this seminar went beyond the lectures and symposia …[the] influence that representatives from RJR have on the attendees of the seminar should be noted.
by:  RJR ( ORG )

claim:  Those teenagers are now 18–21 years old, and since about 70 percent of 18–21 year-olds and 35 percent of older smokers smoke a PM brand, this means that 700,000 of those adult quitters had been PM smokers and 420,000 of the non-smokers would have been PM smokers …[we] don’t need to have that happen again.”60 [emphasis in original]

Although this memo does not address African American smokers specifically, it suggests why the Tobacco Institute had declared in 1984 that preventing any excise tax increase was the industry’s highest priority.61

One of the industry’s key strategies for fighting tax increases was to argue that excise taxes were regressive and disproportionately 

claim:  $66 billion -- Columbia University researchers say that if current trends don't change, obesity-related annual medical costs in the U.S. could increase this amount by 2030 -- on top of current expenditures.
by:  Columbia University researchers ( unknown )

claim:  Wellness programs show the potential to reduce obesity -- and they're cost-effective.
by:  Wellness programs ( unknown )

claim:  But the high rates of heart disease in the developed world suggest that these fats may not be working alone, say a group of researchers from the Cleveland Clinic who study how microbes and bacteria in our gut influence heart disease.
by:  the high rates ( unknown )

claim:  Our gut is full of bacteria — good strains that don’t cause disease — and recent studies show that these microbes can have a significant impact on our health, affecting our propensity for obesity, asthma, inflammatory diseases and even cancer.
by:  good strains ( unknown )

claim:  Meanwhile, vegans and vegetarians have 

by:  he ( unknown )

claim:  "The most healthy thing to do is to cook at home," said researcher Lesser.
by:  researcher Lesser ( unknown )

claim:  Less juice ideally means less sugar consumed by these little kids," said Tatiana Andreyeva, the study's lead author and the director of economic initiatives at the Rudd Center for Food Policy & Obesity at Yale University in New Haven, Connecticut.
by:  Tatiana Andreyeva ( PERSON )

claim:  What's more, Andreyeva told Reuters Health that their study showed that WIC recipients didn't end up buying more juice with their own money to compensate.
by:  Andreyeva ( PERSON )

claim:  "We expected a reduction in juice, but we didn't know if we'd see a switch to other beverages," she said.
by:  she ( unknown )

claim:  The Institute of Medicine suggested bringing juice and other WIC food in line with recommendations.
by:  The Institute ( unknown )

claim:  "In terms of calories it's not much different from soda," she said.
by:  she ( unknown )

claim

claim:  Parents reported on their income and education, as well as how often children drank sugary beverages and watched TV.
by:  Parents ( unknown )

claim:  Four-year-old sugary beverage drinkers also tended to have a higher rate of obesity than non-drinkers - but that finding could have been due to chance, the researchers reported Monday in Pediatrics.
by:  the researchers ( unknown )

claim:  The researchers said kids who drink sports drinks and other beverages with added sugar may not make up for the extra calories by eating or drinking less of something else.
by:  The researchers ( unknown )

claim:  Dr. Y. Claire Wang, who studies childhood nutrition and obesity at the Columbia University Mailman School of Public Health in New York, said she wasn't surprised by the findings.
by:  Dr. Y. Claire Wang ( unknown )

claim:  DeBoer said parents should be aware of where young kids are getting extra unhealthy calories and stick with water and milk for beverage options.
by:  DeBoer ( PER

claim:  GMA: IWG proposals unworkable and unscientific

One of the GMA’s biggest concerns was the ultra-low sodium targets (140 mg/RACC for individual foods by 2021) in the IWG's proposals, which it argued would prevent members from marketing (to children) “almost every ready-to-eat breakfast cereal, most instant oatmeal products and many whole wheat and whole grain breads”.
by:  it ( unknown )

claim:  In the case of both ‘Kidvid’ and the IWG proposals, the GMA and others lobbied Congress relentlessly to ensure that neither made any headway, claimed Dr Dietz: “The comments in response to the proposed principles resembled the tobacco industry template: challenge the science, dismiss the scientists’ qualifications, and exaggerate the impact of implementation.
by:  IWG ( ORG )

claim:  “The financial and political resources of the food and advertising industries mobilized the congressional response and quashed rulemaking.”

Dr Marion Nestle: ‘It is hard to believe how thoroughly Congress

claim:  In addition, ABA members have also launched many reduced- and zero-calorie products, they said.
by:  they ( unknown )

claim:  Between March 2012 and March 2013 a network of plaintiffs’ lawyers filed 28 food labeling class actions in the U.S. District Court for the Northern District of California alone, says the ILR in a report called ‘The New Lawsuit Ecosystem’.
by:  the ILR ( unknown )

claim:  While plaintiff’s attorneys claim these lawsuits are consumer-driven complaints designed to force ‘Big Food’ to stop duping shoppers over how healthy their products are, ILR says most are frivolous cases without genuine victims that focus on highly technical infringements of food labeling legislation or exploit the lack of regulatory clarity over what is ‘natural’.
by:  ’s attorneys ( unknown )

claim:  • Plaintiff-friendly consumer protection laws that provide an opportunity to attempt to recover without showing an actual injury.
by:   ( unknown )

claim:  Meanwhile, some plaintiffs’ 

claim:  The authors argue for a need to do more to help consumers to eat sensibly and to encourage portion control, among other findings published online today ahead of print in the December issue of the American Journal of Preventive Medicine.
by:  The authors ( unknown )

claim:  “While previous studies have shown mixed impacts of menu labeling in fast food settings, this study suggests that nutrition information may be particularly useful in full-service restaurants,” said Donald F. Schwarz, MD, health commissioner for the City of Philadelphia and a co-author of the study.
by:  Donald F. Schwarz ( PERSON )

claim:  The authors looked at differences in calories and nutrients purchased between those who dined at outlets with menu labeling and those who did not, and at customers’ reported use of nutritional information when ordering.
by:   ( unknown )

claim:  “When you compare the average intake with the recommended daily intake, these consumers purchased almost all their calories, an

5