In [1]:
actual_text = "Artificial Intelligence (AI) is transforming numerous industries by enhancing efficiencies and creating new opportunities. From healthcare to transportation, AI applications are helping businesses optimize their processes and deliver better services. For instance, AI-driven algorithms can analyze vast amounts of data to uncover insights that humans might miss. In healthcare, AI is being used for accurate diagnostics and personalized medicine. Moreover, autonomous vehicles, powered by AI, promise to revolutionize the way we commute and transport goods. However, as AI technology advances, it also poses ethical challenges, including concerns about privacy and job displacement. Society must navigate these challenges thoughtfully to leverage AI's full potential while minimizing risks. Overall, the impact of AI on the modern world is profound and multifaceted, making it a pivotal part of future innovations."

In [2]:
import re #The re library in Python stands for "regular expression" and is a powerful tool used for string searching and manipulation. It provides functions to work with patterns to match, search, or modify strings. Includes matching pattern, splitting strings, subsituting texts, etc.
import nltk #The Natural Language Toolkit, commonly known as NLTK, is a comprehensive library in Python used for natural language processing (NLP) and computational linguistics. POS tagging, named entity recognition, parsing, etc.

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [3]:
actual_text = actual_text.lower()
actual_text

"artificial intelligence (ai) is transforming numerous industries by enhancing efficiencies and creating new opportunities. from healthcare to transportation, ai applications are helping businesses optimize their processes and deliver better services. for instance, ai-driven algorithms can analyze vast amounts of data to uncover insights that humans might miss. in healthcare, ai is being used for accurate diagnostics and personalized medicine. moreover, autonomous vehicles, powered by ai, promise to revolutionize the way we commute and transport goods. however, as ai technology advances, it also poses ethical challenges, including concerns about privacy and job displacement. society must navigate these challenges thoughtfully to leverage ai's full potential while minimizing risks. overall, the impact of ai on the modern world is profound and multifaceted, making it a pivotal part of future innovations."

In [4]:
clean_text = re.sub('[^a-zA-Z]', ' ', actual_text) # this line says if any character is not an alphabet then replace it with the space.
clean_text = re.sub('\s+', ' ', clean_text) # Now due to your above work there may be generated many white spaces so it aggregates it and make it a single white space.
clean_text

'artificial intelligence ai is transforming numerous industries by enhancing efficiencies and creating new opportunities from healthcare to transportation ai applications are helping businesses optimize their processes and deliver better services for instance ai driven algorithms can analyze vast amounts of data to uncover insights that humans might miss in healthcare ai is being used for accurate diagnostics and personalized medicine moreover autonomous vehicles powered by ai promise to revolutionize the way we commute and transport goods however as ai technology advances it also poses ethical challenges including concerns about privacy and job displacement society must navigate these challenges thoughtfully to leverage ai s full potential while minimizing risks overall the impact of ai on the modern world is profound and multifaceted making it a pivotal part of future innovations '

In [5]:
sentence_list = nltk.sent_tokenize(actual_text) # Now basically using the sent_tokenize functionality of nltk we are creating sentences as distinct objects from the text and nltk already has pre defined models that can logicall set the boundaries of the sentences.
sentence_list

NameError: name 'nltk' is not defined

In [6]:
nltk.download('stopwords') # Here we are downloading all the stopwords at once. Stop words are the ones which are less important like and, the or this.

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [7]:
stopwords = nltk.corpus.stopwords.words('english') # import all the stop words from english

word_frequencies = {} # just created a dictionary that will store words and their frequency
for word in nltk.word_tokenize(clean_text): # split the clean text into individual words or tokens
    if word not in stopwords:
        if word not in word_frequencies:
            word_frequencies[word] = 1
        else:
            word_frequencies[word] += 1

In [8]:
maximum_frequency = max(word_frequencies.values()) # In this overall code we find the word with maximum frequency and note it's frequency value and then 
# we iterate in the dictionary and divide each frequency value with max_frequency to normalize all values and bring them in between 0 and 1

for word in word_frequencies:
    word_frequencies[word] = word_frequencies[word] / maximum_frequency

In [9]:
sentence_scores = {}

for sentence in sentence_list:
    for word in nltk.word_tokenize(sentence):
        if word in word_frequencies and len(sentence.split(' ')) < 30: # here we check if the sentence contain a significant work from earlier declared dictionary and then 
            # if the sentence has less than 30 occurences of that word then accept it.
            if sentence not in sentence_scores:
                sentence_scores[sentence] = word_frequencies[word]
            else:
                sentence_scores[sentence] += word_frequencies[word]
# here in above if else we are just including each sentence in the scores named dictionary and and keeping the count of frequencies of all words that are present in the sentence

In [10]:
word_frequencies

{'artificial': 0.125,
 'intelligence': 0.125,
 'ai': 1.0,
 'transforming': 0.125,
 'numerous': 0.125,
 'industries': 0.125,
 'enhancing': 0.125,
 'efficiencies': 0.125,
 'creating': 0.125,
 'new': 0.125,
 'opportunities': 0.125,
 'healthcare': 0.25,
 'transportation': 0.125,
 'applications': 0.125,
 'helping': 0.125,
 'businesses': 0.125,
 'optimize': 0.125,
 'processes': 0.125,
 'deliver': 0.125,
 'better': 0.125,
 'services': 0.125,
 'instance': 0.125,
 'driven': 0.125,
 'algorithms': 0.125,
 'analyze': 0.125,
 'vast': 0.125,
 'amounts': 0.125,
 'data': 0.125,
 'uncover': 0.125,
 'insights': 0.125,
 'humans': 0.125,
 'might': 0.125,
 'miss': 0.125,
 'used': 0.125,
 'accurate': 0.125,
 'diagnostics': 0.125,
 'personalized': 0.125,
 'medicine': 0.125,
 'moreover': 0.125,
 'autonomous': 0.125,
 'vehicles': 0.125,
 'powered': 0.125,
 'promise': 0.125,
 'revolutionize': 0.125,
 'way': 0.125,
 'commute': 0.125,
 'transport': 0.125,
 'goods': 0.125,
 'however': 0.125,
 'technology': 0.125,


In [11]:
sentence_scores

{'artificial intelligence (ai) is transforming numerous industries by enhancing efficiencies and creating new opportunities.': 2.25,
 'from healthcare to transportation, ai applications are helping businesses optimize their processes and deliver better services.': 2.375,
 'for instance, ai-driven algorithms can analyze vast amounts of data to uncover insights that humans might miss.': 1.375,
 'in healthcare, ai is being used for accurate diagnostics and personalized medicine.': 1.875,
 'moreover, autonomous vehicles, powered by ai, promise to revolutionize the way we commute and transport goods.': 2.25,
 'however, as ai technology advances, it also poses ethical challenges, including concerns about privacy and job displacement.': 2.625,
 "society must navigate these challenges thoughtfully to leverage ai's full potential while minimizing risks.": 2.375,
 'overall, the impact of ai on the modern world is profound and multifaceted, making it a pivotal part of future innovations.': 2.375}

In [12]:
import heapq # using heapq functionality you can say the element with highest score will be in the front and then 2nd highest second and that's all.
summary = heapq.nlargest(5, sentence_scores, key=sentence_scores.get)

print(" ".join(summary))
# just extract the top five sentences based on their scores and then join them in a single string. nlargest method gives 5 largest

however, as ai technology advances, it also poses ethical challenges, including concerns about privacy and job displacement. from healthcare to transportation, ai applications are helping businesses optimize their processes and deliver better services. society must navigate these challenges thoughtfully to leverage ai's full potential while minimizing risks. overall, the impact of ai on the modern world is profound and multifaceted, making it a pivotal part of future innovations. artificial intelligence (ai) is transforming numerous industries by enhancing efficiencies and creating new opportunities.


In [34]:
pip install textstat

Collecting textstat
  Downloading textstat-0.7.5-py3-none-any.whl.metadata (15 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Collecting cmudict (from textstat)
  Downloading cmudict-1.0.32-py3-none-any.whl.metadata (3.6 kB)
Collecting importlib-resources>=5 (from cmudict->textstat)
  Downloading importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)
Downloading textstat-0.7.5-py3-none-any.whl (105 kB)
   ---------------------------------------- 0.0/105.3 kB ? eta -:--:--
   ----------------------- ---------------- 61.4/105.3 kB 1.1 MB/s eta 0:00:01
   ---------------------------------------- 105.3/105.3 kB 1.0 MB/s eta 0:00:00
Downloading cmudict-1.0.32-py3-none-any.whl (939 kB)
   ---------------------------------------- 0.0/939.4 kB ? eta -:--:--
   ------ --------------------------------- 153.6/939.4 kB 2.3 MB/s eta 0:00:01
   -------- ------------------------------- 204.8/939.4 kB 2.1 MB/s eta 0:00:01
   ----------- -----

In [13]:
import textstat
# Calculate readability
readability_score = textstat.flesch_kincaid_grade(actual_text)

print(f"Flesch-Kincaid Grade Level: {readability_score}")
if readability_score < 6:
    print("This text is suitable for a lower reading level.")
elif 6 <= readability_score < 12:
    print("This text is suitable for middle school to high school students.")
else:
    print("This text is better for college-level readers.")

#Flesch-Kincaid Grade Level: Basically it is a US based technique that is used to find the complexity in text they have their own formula of finding complexity using words, syllabis and all.

Flesch-Kincaid Grade Level: 15.4
This text is better for college-level readers.


In [3]:
#spaCy is a popular open-source library for advanced natural language processing (NLP) in Python.
#1. For high speed data access and manipulation
#2. Pre-trained models
#3. Easy Integration.
#Load the model: Load the English language model en_core_web_sm

In [None]:
#Named Entity Recognition (NER) is a subtask of Natural Language Processing (NLP) that involves locating and classifying named entities in text into predefined categories, 
#such as persons, organizations, locations, dates, and more. Works by tokenization, POS tagging and then entity recognition.
#Entity: Apple Inc., Label: ORG
#Entity: New York City, Label: GPE
#Entity: Tim Cook, Label: PERSON
#Entity: Apple, Label: ORG
#Entity: September 25, 2023, Label: DATE