In [1]:
!pip install torch



In [2]:
!pip install transformers==4.28.0



In [3]:
from transformers import pipeline
from bs4 import BeautifulSoup # For web scraping
import requests # For sending http requests for web scraping

In [4]:
# Load summarization pipeline
summarizer = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


## Getting content from the web

In [13]:
# Get PDF link.
URL = "https://news.mit.edu/2023/ai-models-astrocytes-role-brain-0815"

In [14]:
r = requests.get(URL)

In [15]:
# Create instance of BeautifulSoup by passing in the text.
soup = BeautifulSoup(r.text, 'html.parser')
# Get the required text by searching for h1 and p tags.
results = soup.find_all(['h1', 'p'])

In [16]:
results

[<p class="tle-search--suggested-results--feedback"><a class="tle-search--suggested-results--feedback--link" href="http://web.mit.edu/feedback">Suggestions or feedback?</a></p>,
 <h1><span itemprop="name headline">AI models are powerful, but are they biologically plausible?</span> 
 </h1>,
 <p>
     Images for download on the MIT News office website are made available to non-commercial entities, press and the general public under a 
     <a href="http://creativecommons.org/licenses/by-nc-nd/3.0/" target="_blank">Creative Commons Attribution Non-Commercial No Derivatives license</a>.
     You may not alter the images provided, other than to crop them to size. A credit line must be used when reproducing images; if one is not provided 
     below, credit the images to "MIT." 
   </p>,
 <p class="news-article--images-gallery--nav--inner">
 <button class="news-article--images-gallery--nav--button news-article--images-gallery--nav--button--previous"><svg class="arrow--point-west--slider" dat

In [17]:
# Extracting and concatenating text from the tags into one single block.
text = [result.text for result in results]
article = ' '.join(text)

In [18]:
article

'Suggestions or feedback? AI models are powerful, but are they biologically plausible? \n \n    Images for download on the MIT News office website are made available to non-commercial entities, press and the general public under a \n    Creative Commons Attribution Non-Commercial No Derivatives license.\n    You may not alter the images provided, other than to crop them to size. A credit line must be used when reproducing images; if one is not provided \n    below, credit the images to "MIT." \n   \n\n\n\n\n\n\n\n\n\n\n\n\nPrevious image\nNext image\n\n\n\n\n\n\n\n\n\n\n\n\n\n Artificial neural networks, ubiquitous machine-learning models that can be trained to complete many tasks, are so called because their architecture is inspired by the way biological neurons process information in the human brain. About six years ago, scientists discovered a new type of more powerful neural network model known as a transformer. These models can achieve unprecedented performance, such as by generat

## Chunking data

In [19]:
# Chunking the text in order to be passed to basic transformed model. <eos> tag added to split using the tag to build array.
article = article.replace('.', '.<eos>')
article = article.replace('?', '?<eos>')
article = article.replace('!', '!<eos>')
sentences = article.split('<eos>') # Creating an array of sentences

In [20]:
sentences

['Suggestions or feedback?',
 ' AI models are powerful, but are they biologically plausible?',
 ' \n \n    Images for download on the MIT News office website are made available to non-commercial entities, press and the general public under a \n    Creative Commons Attribution Non-Commercial No Derivatives license.',
 '\n    You may not alter the images provided, other than to crop them to size.',
 ' A credit line must be used when reproducing images; if one is not provided \n    below, credit the images to "MIT.',
 '" \n   \n\n\n\n\n\n\n\n\n\n\n\n\nPrevious image\nNext image\n\n\n\n\n\n\n\n\n\n\n\n\n\n Artificial neural networks, ubiquitous machine-learning models that can be trained to complete many tasks, are so called because their architecture is inspired by the way biological neurons process information in the human brain.',
 ' About six years ago, scientists discovered a new type of more powerful neural network model known as a transformer.',
 ' These models can achieve unprecede

In [21]:
# Limit max word count of a chunk to 500
max_chunk = 500

current_chunk = 0 
chunks = []
for sentence in sentences:
    if len(chunks) == current_chunk + 1: ## Check if a chunk was already started.
        if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk: ## Check if current chunk size + new sentence < max chunk size
            chunks[current_chunk].extend(sentence.split(' '))
        else:  ## If chunk size exceeds max limit, start new chunk
            current_chunk += 1
            chunks.append(sentence.split(' '))
    else:    ## If new chunk being created, add word by splitting each sentence.
        print(current_chunk)
        chunks.append(sentence.split(' '))

for chunk_id in range(len(chunks)):
    chunks[chunk_id] = ' '.join(chunks[chunk_id])

0


In [29]:
len(chunks) # Returns 3
chunks[0]

'Suggestions or feedback?  AI models are powerful, but are they biologically plausible?  \n \n    Images for download on the MIT News office website are made available to non-commercial entities, press and the general public under a \n    Creative Commons Attribution Non-Commercial No Derivatives license. \n    You may not alter the images provided, other than to crop them to size.  A credit line must be used when reproducing images; if one is not provided \n    below, credit the images to "MIT. " \n   \n\n\n\n\n\n\n\n\n\n\n\n\nPrevious image\nNext image\n\n\n\n\n\n\n\n\n\n\n\n\n\n Artificial neural networks, ubiquitous machine-learning models that can be trained to complete many tasks, are so called because their architecture is inspired by the way biological neurons process information in the human brain.  About six years ago, scientists discovered a new type of more powerful neural network model known as a transformer.  These models can achieve unprecedented performance, such as by 

## Summarizing

In [34]:
# Summarizing text
summary = summarizer(chunks, max_length=150, min_length=30, do_sample=False)

In [35]:
summary  ## Shows 3 different summary for each of the 3 chunks.

[{'summary_text': ' Researchers from MIT, the MIT-IBM Watson AI Lab, and Harvard Medical School have produced a hypothesis that may explain how a transformer could be built using biological elements in the brain . Their hypothesis provides insights that could spark future neuroscience research into how the human brain works .'},
 {'summary_text': ' Transformers operate differently than other neural network models . For self-attention to work, the transformer must keep all the words ready in some form of memory . This didn’t seem biologically possible due to the way neurons communicate . A few years ago scientists studying a machine-learning model (known as a Dense Associated Memory) realized this mechanism could occur in the brain .'},
 {'summary_text': " The next step for the researchers is to make the leap from theory to practice . “The potential of unleashing the computational power of the other half of our brain is enormous,” says Rutgers University's Konstantinos Michmizos ."}]

In [40]:
# Joining the summary texts into one single text
text = ' '.join([summ['summary_text'] for summ in summary])

In [41]:
text

" Researchers from MIT, the MIT-IBM Watson AI Lab, and Harvard Medical School have produced a hypothesis that may explain how a transformer could be built using biological elements in the brain . Their hypothesis provides insights that could spark future neuroscience research into how the human brain works .  Transformers operate differently than other neural network models . For self-attention to work, the transformer must keep all the words ready in some form of memory . This didn’t seem biologically possible due to the way neurons communicate . A few years ago scientists studying a machine-learning model (known as a Dense Associated Memory) realized this mechanism could occur in the brain .  The next step for the researchers is to make the leap from theory to practice . “The potential of unleashing the computational power of the other half of our brain is enormous,” says Rutgers University's Konstantinos Michmizos ."

## Writing summary to text file

In [42]:
# Output summary to a text file
with open('blogsummary.txt', 'w') as f:
    f.write(text)