# Importing Modules

In [1]:
from nltk.corpus import stopwords #you can remove stop words for speed
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

# French Language

# Reading the text file and splitting the paragraph into sentences

In [2]:
file = open("C:/Users/seerl/Downloads/FrenchText.txt", "r")
#This file contains one paragraph of multiple sentences
filedata = file.readlines()
article = filedata[0].split(". ") #Just do the first paragraph
sentences = []
for sentence in article:
    print(sentence)
    sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))

Je mâ€™appelle Jessica
Je suis une fille, je suis franÃ§aise et jâ€™ai treize ans
Je vais Ã  lâ€™Ã©cole Ã  Nice, mais jâ€™habite Ã  Cagnes-Sur-Mer
Jâ€™ai deux frÃ¨res
Le premier sâ€™appelle Thomas, il a quatorze ans
Le second sâ€™appelle Yann et il a neuf ans
Mon papa est italien et il est fleuriste
Ma mÃ¨re est allemande et est avocate
Mes frÃ¨res et moi parlons franÃ§ais, italien et allemand Ã  la maison
Nous avons une grande maison avec un chien, un poisson et deux chats.


# Printing Sentences

In [3]:
print("Sentences are ", sentences)

Sentences are  [['Je', 'mâ€™appelle', 'Jessica'], ['Je', 'suis', 'une', 'fille,', 'je', 'suis', 'franÃ§aise', 'et', 'jâ€™ai', 'treize', 'ans'], ['Je', 'vais', 'Ã\xa0', 'lâ€™Ã©cole', 'Ã\xa0', 'Nice,', 'mais', 'jâ€™habite', 'Ã\xa0', 'Cagnes-Sur-Mer'], ['Jâ€™ai', 'deux', 'frÃ¨res'], ['Le', 'premier', 'sâ€™appelle', 'Thomas,', 'il', 'a', 'quatorze', 'ans'], ['Le', 'second', 'sâ€™appelle', 'Yann', 'et', 'il', 'a', 'neuf', 'ans'], ['Mon', 'papa', 'est', 'italien', 'et', 'il', 'est', 'fleuriste'], ['Ma', 'mÃ¨re', 'est', 'allemande', 'et', 'est', 'avocate'], ['Mes', 'frÃ¨res', 'et', 'moi', 'parlons', 'franÃ§ais,', 'italien', 'et', 'allemand', 'Ã\xa0', 'la', 'maison'], ['Nous', 'avons', 'une', 'grande', 'maison', 'avec', 'un', 'chien,', 'un', 'poisson', 'et', 'deux', 'chats.']]


# Function to calculate similarity

In [4]:
def sentence_similarity(sent1, sent2 ):
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    for w in sent1:
        vector1[all_words.index(w)] += 1
        for w in sent2:
            vector2[all_words.index(w)] += 1
            return 1 - cosine_distance(vector1, vector2)

# Creating Similarity Matrix

In [5]:
similarity_matrix = np.zeros((len(sentences), len(sentences)))

for idx1 in range(len(sentences)):
    for idx2 in range(len(sentences)):
        if idx1 == idx2: #ignore if both are same sentences
            continue
        similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2])

print("Similarity matrix:\n", similarity_matrix)

Similarity matrix:
 [[0. 1. 1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


# Ranking sentences in similarity matrix

In [6]:
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(sentence_similarity_graph)
print("scores", scores)

scores {0: 0.1739125798540512, 1: 0.1739125798540512, 2: 0.1739125798540512, 3: 0.02608742014594887, 4: 0.1739125798540512, 5: 0.1739125798540512, 6: 0.02608742014594887, 7: 0.02608742014594887, 8: 0.02608742014594887, 9: 0.02608742014594887}


# Sorting the ranks

In [7]:
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in
enumerate(sentences)), reverse=True)
print("Indexes of top ranked_sentence order are \n\n",
ranked_sentence)

Indexes of top ranked_sentence order are 

 [(0.1739125798540512, ['Le', 'second', 'sâ€™appelle', 'Yann', 'et', 'il', 'a', 'neuf', 'ans']), (0.1739125798540512, ['Le', 'premier', 'sâ€™appelle', 'Thomas,', 'il', 'a', 'quatorze', 'ans']), (0.1739125798540512, ['Je', 'vais', 'Ã\xa0', 'lâ€™Ã©cole', 'Ã\xa0', 'Nice,', 'mais', 'jâ€™habite', 'Ã\xa0', 'Cagnes-Sur-Mer']), (0.1739125798540512, ['Je', 'suis', 'une', 'fille,', 'je', 'suis', 'franÃ§aise', 'et', 'jâ€™ai', 'treize', 'ans']), (0.1739125798540512, ['Je', 'mâ€™appelle', 'Jessica']), (0.02608742014594887, ['Nous', 'avons', 'une', 'grande', 'maison', 'avec', 'un', 'chien,', 'un', 'poisson', 'et', 'deux', 'chats.']), (0.02608742014594887, ['Mon', 'papa', 'est', 'italien', 'et', 'il', 'est', 'fleuriste']), (0.02608742014594887, ['Mes', 'frÃ¨res', 'et', 'moi', 'parlons', 'franÃ§ais,', 'italien', 'et', 'allemand', 'Ã\xa0', 'la', 'maison']), (0.02608742014594887, ['Ma', 'mÃ¨re', 'est', 'allemande', 'et', 'est', 'avocate']), (0.02608742014594887

# Selecting number of sentences to pick for summary

In [8]:
#Step 5 - How many sentences to pick
n = int(input("How many sentences do you want in the summary? "))
#n=2
summarize_text = []
for i in range(n):
    summarize_text.append(" ".join(ranked_sentence[i][1]))

How many sentences do you want in the summary? 3


# Printing the Summary

In [9]:
# Step 6 - Offcourse, output the summarize text
print("Summarize Text: \n", ". ".join(summarize_text))

Summarize Text: 
 Le second sâ€™appelle Yann et il a neuf ans. Le premier sâ€™appelle Thomas, il a quatorze ans. Je vais Ã  lâ€™Ã©cole Ã  Nice, mais jâ€™habite Ã  Cagnes-Sur-Mer


# Spanish Language

# Reading the text file and splitting the paragraph into sentences

In [10]:
file = open("C:/Users/seerl/Downloads/SpanishText.txt", "r")
#This file contains one paragraph of multiple sentences
filedata = file.readlines()
article = filedata[0].split(". ") #Just do the first paragraph
sentences = []
for sentence in article:
    print(sentence)
    sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))

Yo vivo en Granada, una ciudad pequeÃ±a que tiene monumentos muy importantes como la Alhambra
AquÃ­ la comida es deliciosa y son famosos el gazpacho, el rebujito y el salmorejo
Mi nueva casa estÃ¡ en una calle ancha que tiene muchos Ã¡rboles
El piso de arriba de mi casa tiene tres dormitorios y un despacho para trabajar
El piso de abajo tiene una cocina muy grande, un comedor con una mesa y seis sillas, un salÃ³n con dos sofÃ¡s verdes, una televisiÃ³n y cortinas
AdemÃ¡s, tiene una pequeÃ±a terraza con piscina donde puedo tomar el sol en verano.


# Printing Sentences

In [11]:
print("Sentences are ", sentences)

Sentences are  [['Yo', 'vivo', 'en', 'Granada,', 'una', 'ciudad', 'pequeÃ±a', 'que', 'tiene', 'monumentos', 'muy', 'importantes', 'como', 'la', 'Alhambra'], ['AquÃ\xad', 'la', 'comida', 'es', 'deliciosa', 'y', 'son', 'famosos', 'el', 'gazpacho,', 'el', 'rebujito', 'y', 'el', 'salmorejo'], ['Mi', 'nueva', 'casa', 'estÃ¡', 'en', 'una', 'calle', 'ancha', 'que', 'tiene', 'muchos', 'Ã¡rboles'], ['El', 'piso', 'de', 'arriba', 'de', 'mi', 'casa', 'tiene', 'tres', 'dormitorios', 'y', 'un', 'despacho', 'para', 'trabajar'], ['El', 'piso', 'de', 'abajo', 'tiene', 'una', 'cocina', 'muy', 'grande,', 'un', 'comedor', 'con', 'una', 'mesa', 'y', 'seis', 'sillas,', 'un', 'salÃ³n', 'con', 'dos', 'sofÃ¡s', 'verdes,', 'una', 'televisiÃ³n', 'y', 'cortinas'], ['AdemÃ¡s,', 'tiene', 'una', 'pequeÃ±a', 'terraza', 'con', 'piscina', 'donde', 'puedo', 'tomar', 'el', 'sol', 'en', 'verano.']]


# Creating Similarity Matrix

In [12]:
similarity_matrix = np.zeros((len(sentences), len(sentences)))

for idx1 in range(len(sentences)):
    for idx2 in range(len(sentences)):
        if idx1 == idx2: #ignore if both are same sentences
            continue
        similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2])

print("Similarity matrix:\n", similarity_matrix)

Similarity matrix:
 [[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


# Ranking sentences in similarity matrix

In [13]:
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(sentence_similarity_graph)
print("scores", scores)

scores {0: 0.05769302748150241, 1: 0.05769302748150241, 2: 0.05769302748150241, 3: 0.38461394503699525, 4: 0.38461394503699525, 5: 0.05769302748150241}


# Sorting the Ranks

In [14]:
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in
enumerate(sentences)), reverse=True)
print("Indexes of top ranked_sentence order are \n\n",
ranked_sentence)

Indexes of top ranked_sentence order are 

 [(0.38461394503699525, ['El', 'piso', 'de', 'arriba', 'de', 'mi', 'casa', 'tiene', 'tres', 'dormitorios', 'y', 'un', 'despacho', 'para', 'trabajar']), (0.38461394503699525, ['El', 'piso', 'de', 'abajo', 'tiene', 'una', 'cocina', 'muy', 'grande,', 'un', 'comedor', 'con', 'una', 'mesa', 'y', 'seis', 'sillas,', 'un', 'salÃ³n', 'con', 'dos', 'sofÃ¡s', 'verdes,', 'una', 'televisiÃ³n', 'y', 'cortinas']), (0.05769302748150241, ['Yo', 'vivo', 'en', 'Granada,', 'una', 'ciudad', 'pequeÃ±a', 'que', 'tiene', 'monumentos', 'muy', 'importantes', 'como', 'la', 'Alhambra']), (0.05769302748150241, ['Mi', 'nueva', 'casa', 'estÃ¡', 'en', 'una', 'calle', 'ancha', 'que', 'tiene', 'muchos', 'Ã¡rboles']), (0.05769302748150241, ['AquÃ\xad', 'la', 'comida', 'es', 'deliciosa', 'y', 'son', 'famosos', 'el', 'gazpacho,', 'el', 'rebujito', 'y', 'el', 'salmorejo']), (0.05769302748150241, ['AdemÃ¡s,', 'tiene', 'una', 'pequeÃ±a', 'terraza', 'con', 'piscina', 'donde', 'puedo'

# Selecting the number of sentences to pick for summary

In [15]:
#Step 5 - How many sentences to pick
n = int(input("How many sentences do you want in the summary? "))
#n=2
summarize_text = []
for i in range(n):
    summarize_text.append(" ".join(ranked_sentence[i][1]))

How many sentences do you want in the summary? 4


# Printing the Summary

In [16]:
# Step 6 - Offcourse, output the summarize text
print("Summarize Text: \n", ". ".join(summarize_text))

Summarize Text: 
 El piso de arriba de mi casa tiene tres dormitorios y un despacho para trabajar. El piso de abajo tiene una cocina muy grande, un comedor con una mesa y seis sillas, un salÃ³n con dos sofÃ¡s verdes, una televisiÃ³n y cortinas. Yo vivo en Granada, una ciudad pequeÃ±a que tiene monumentos muy importantes como la Alhambra. Mi nueva casa estÃ¡ en una calle ancha que tiene muchos Ã¡rboles


# Portuguese Language

# Reading the text file and splitting the paragraph into sentences

In [17]:
file = open("C:/Users/seerl/Downloads/PortugueseText.txt", "r")
#This file contains one paragraph of multiple sentences
filedata = file.readlines()
article = filedata[0].split(". ") #Just do the first paragraph
sentences = []
for sentence in article:
    print(sentence)
    sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))

JoÃ£o e Pedro eram os nomes dos amigos de Lucas
Os trÃªs subiram na Ã¡rvore para pegar algumas frutas, mas nÃ£o conseguiram chegar atÃ© o alto da jaqueira
Um fazendeiro que morava perto do acampamento resolveu ajudar os jovens e trouxe uma escada para que alcanÃ§assem as jacas
O fazendeiro se chamava Roberto e conseguiu ajudar Lucas e seus amigos
Depois de comerem as jacas eles ficaram conversando embaixo da jaqueira
Lucas gostou tanto daquele lugar que resolveu voltar outras vezes
Sempre que vai acampar ele faz uma visita a Roberto, pois se tornaram amigos.


# Printing Sentences

In [18]:
print("Sentences are ", sentences)

Sentences are  [['JoÃ£o', 'e', 'Pedro', 'eram', 'os', 'nomes', 'dos', 'amigos', 'de', 'Lucas'], ['Os', 'trÃªs', 'subiram', 'na', 'Ã¡rvore', 'para', 'pegar', 'algumas', 'frutas,', 'mas', 'nÃ£o', 'conseguiram', 'chegar', 'atÃ©', 'o', 'alto', 'da', 'jaqueira'], ['Um', 'fazendeiro', 'que', 'morava', 'perto', 'do', 'acampamento', 'resolveu', 'ajudar', 'os', 'jovens', 'e', 'trouxe', 'uma', 'escada', 'para', 'que', 'alcanÃ§assem', 'as', 'jacas'], ['O', 'fazendeiro', 'se', 'chamava', 'Roberto', 'e', 'conseguiu', 'ajudar', 'Lucas', 'e', 'seus', 'amigos'], ['Depois', 'de', 'comerem', 'as', 'jacas', 'eles', 'ficaram', 'conversando', 'embaixo', 'da', 'jaqueira'], ['Lucas', 'gostou', 'tanto', 'daquele', 'lugar', 'que', 'resolveu', 'voltar', 'outras', 'vezes'], ['Sempre', 'que', 'vai', 'acampar', 'ele', 'faz', 'uma', 'visita', 'a', 'Roberto,', 'pois', 'se', 'tornaram', 'amigos.']]


# Creating Similarity Matrix

In [19]:
similarity_matrix = np.zeros((len(sentences), len(sentences)))

for idx1 in range(len(sentences)):
    for idx2 in range(len(sentences)):
        if idx1 == idx2: #ignore if both are same sentences
            continue
        similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2])

print("Similarity matrix:\n", similarity_matrix)

Similarity matrix:
 [[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


# Ranking Sentences in Similarity Matrix

In [20]:
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(sentence_similarity_graph)
print("scores", scores)

scores {0: 0.14285714285714282, 1: 0.14285714285714282, 2: 0.14285714285714282, 3: 0.14285714285714282, 4: 0.14285714285714282, 5: 0.14285714285714282, 6: 0.14285714285714282}


# Sorting the Ranks

In [21]:
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in
enumerate(sentences)), reverse=True)
print("Indexes of top ranked_sentence order are \n\n",
ranked_sentence)

Indexes of top ranked_sentence order are 

 [(0.14285714285714282, ['Um', 'fazendeiro', 'que', 'morava', 'perto', 'do', 'acampamento', 'resolveu', 'ajudar', 'os', 'jovens', 'e', 'trouxe', 'uma', 'escada', 'para', 'que', 'alcanÃ§assem', 'as', 'jacas']), (0.14285714285714282, ['Sempre', 'que', 'vai', 'acampar', 'ele', 'faz', 'uma', 'visita', 'a', 'Roberto,', 'pois', 'se', 'tornaram', 'amigos.']), (0.14285714285714282, ['Os', 'trÃªs', 'subiram', 'na', 'Ã¡rvore', 'para', 'pegar', 'algumas', 'frutas,', 'mas', 'nÃ£o', 'conseguiram', 'chegar', 'atÃ©', 'o', 'alto', 'da', 'jaqueira']), (0.14285714285714282, ['O', 'fazendeiro', 'se', 'chamava', 'Roberto', 'e', 'conseguiu', 'ajudar', 'Lucas', 'e', 'seus', 'amigos']), (0.14285714285714282, ['Lucas', 'gostou', 'tanto', 'daquele', 'lugar', 'que', 'resolveu', 'voltar', 'outras', 'vezes']), (0.14285714285714282, ['JoÃ£o', 'e', 'Pedro', 'eram', 'os', 'nomes', 'dos', 'amigos', 'de', 'Lucas']), (0.14285714285714282, ['Depois', 'de', 'comerem', 'as', 'jac

# Selecting the number of sentences to pick for summary

In [22]:
#Step 5 - How many sentences to pick
n = int(input("How many sentences do you want in the summary? "))
#n=2
summarize_text = []
for i in range(n):
    summarize_text.append(" ".join(ranked_sentence[i][1]))

How many sentences do you want in the summary? 2


# Printing the Summary

In [23]:
# Step 6 - Offcourse, output the summarize text
print("Summarize Text: \n", ". ".join(summarize_text))

Summarize Text: 
 Um fazendeiro que morava perto do acampamento resolveu ajudar os jovens e trouxe uma escada para que alcanÃ§assem as jacas. Sempre que vai acampar ele faz uma visita a Roberto, pois se tornaram amigos.
