In [54]:
#!/usr/bin/env python
# coding: utf-8

from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy
import networkx as nx
 
def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    article = filedata[0].split("\n")
    sentences = []

    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))

# let's begin


In [2]:
from craiyon import Craiyon
from PIL import Image # pip install pillow
from io import BytesIO
import base64

generator = Craiyon() # Instantiates the api wrapper
result = generator.generate("Photorealistic happy duck")
result.save_images() # Saves the generated images to 'current working directory/generated', you can also provide a custom pa

In [51]:
import numpy 

In [52]:
import lyricsgenius
api_key='ORBJc-e5kH3LA_H5KohP8Grgp_YCbaBZgqz536TachvT1_iX7mPPwU3WrADiJ8Nw'
genius= lyricsgenius.Genius(api_key)
title="Imagine Dragons"
artist="Thunder"
song = genius.search_song(title=title, artist=artist)
full_song = song.lyrics
with open('song.txt', 'w') as f:
    f.write(full_song)
print(song.lyrics)

Searching for "Imagine Dragons" by Thunder...
Done.
TranslationsSlovenčinaPortuguêsΕλληνικάThunder Lyrics[Verse 1]
Just a young gun with a quick fuse
I was uptight, wanna let loose
I was dreaming of bigger things
And wanna leave my own life behind
Not a "Yes, sir," not a follower
Fit the box, fit the mold
Have a seat in the foyer, take a number
I was lightning before the thunder

[Pre-Chorus]
Thunder, thunder
Thunder, thun-
Thunder, th-th-thunder, thunder
Thunder, thunder, thun-
Thunder, th-th-thunder, thunder
[Chorus]
Thunder, feel the thunder (Thunder, thunder, thun-)
Lightning then the thunder (Thunder, th-th-thunder, thunder)
Thunder, feel the thunder (Thunder, thunder, thun-, thun-)
Lightning then the thunder, thunder, thunder (Th-th-thunder)
Thunder (Thun-, thun-)

[Verse 2]
Kids were laughing in my classes
While I was scheming for the masses
Who do you think you are?
Dreaming 'bout being a big star
They say, "You're basic," they say, "You're easy"
You're always riding in the bac

In [55]:
generate_summary( "song.txt", 2)

TranslationsSlovenčinaPortuguêsΕλληνικάThunder Lyrics[Verse 1]

Indexes of top ranked_sentence order are  [(1.0, ['TranslationsSlovenčinaPortuguêsΕλληνικάThunder', 'Lyrics[Verse', '1]'])]


IndexError: list index out of range

In [71]:
import contractions
def expand_contractions(text):
    expanded_words = []
    for word in text.split():
        expanded_words.append(contractions.fix(word)) 
    return ' '.join(expanded_words)

def clean_sentence(sentence):
    sentence = sentence.replace("[^a-zA-Z]+", " ")
    sentence = expand_contractions(sentence)
    return sentence.lower().split(" ")
    

In [72]:
def read_article(file_name):
    file = open(file_name, "r")
    article = file.readlines()
    sentences = []
    
    for sentence in article:
        sentences.append(clean_sentence(sentence))
    sentences.pop() 
    
    return sentences

read_article("song.txt")

[['translationsslovenčinaportuguêsελληνικάthunder', 'lyrics[verse', '1]'],
 ['just', 'a', 'young', 'gun', 'with', 'a', 'quick', 'fuse'],
 ['i', 'was', 'uptight,', 'want', 'to', 'let', 'loose'],
 ['i', 'was', 'dreaming', 'of', 'bigger', 'things'],
 ['and', 'want', 'to', 'leave', 'my', 'own', 'life', 'behind'],
 ['not', 'a', '"yes,', 'sir,"', 'not', 'a', 'follower'],
 ['fit', 'the', 'box,', 'fit', 'the', 'mold'],
 ['have', 'a', 'seat', 'in', 'the', 'foyer,', 'take', 'a', 'number'],
 ['i', 'was', 'lightning', 'before', 'the', 'thunder'],
 [''],
 ['[pre-chorus]'],
 ['thunder,', 'thunder'],
 ['thunder,', 'thun-'],
 ['thunder,', 'th-th-thunder,', 'thunder'],
 ['thunder,', 'thunder,', 'thun-'],
 ['thunder,', 'th-th-thunder,', 'thunder'],
 ['[chorus]'],
 ['thunder,', 'feel', 'the', 'thunder', '(thunder,', 'thunder,', 'thun-)'],
 ['lightning',
  'then',
  'the',
  'thunder',
  '(thunder,',
  'th-th-thunder,',
  'thunder)'],
 ['thunder,',
  'feel',
  'the',
  'thunder',
  '(thunder,',
  'thunder

In [73]:
def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))


In [82]:
generate_summary("song.txt", 2)

NameError: name 'numpy' is not defined