In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
from nltk.tokenize import sent_tokenize
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

In [3]:
def get_only_text(url):
    """ 
    Return title and text of article defined 
    as url
    """
    page = urlopen(url)
    soup = BeautifulSoup(page, "lxml")
    text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
  
    print ("=====================")
    print (text)
    print ("=====================")
 
    return soup.title.text, text  


In [4]:
url = "https://1000wordphilosophy.com/2019/05/01/camus-on-the-absurd-the-myth-of-sisyphus/"
text = get_only_text(url)

1000-Word Philosophy: An Introductory Anthology Philosophy, One Thousand Words at a Time Author: Erik Van Aken
Category: Phenomenology and Existentialism, Ethics
Word Count: 1000 “There is only one truly serious philosophical problem, and that is suicide. Judging whether life is or is not worth living amounts to answering the fundamental question of philosophy.” –  Albert Camus It might seem flippant to remark that the essential question in philosophy is “Should I kill myself?” But the question of suicide rests on what Camus considered the essential human problem: the sense in which our lives are entirely absurd. This essay will outline the origin and consequences of Camus’s notion of the absurd from his 1942 The Myth of Sisyphus.[1] There are many things we might naturally call absurd: a rude joke, an outrageous statement, or the price of a pair of designer jeans. This though is not what Camus means by “absurd.” For Camus, the absurd originates from a combination of two things: the wa

In [5]:
# Split by sentences
sentences = []
for s in text:
    sentences.append(sent_tokenize(s))

sentences = [y for x in sentences for y in x]

In [14]:
sentences[:10]

['Camus on the Absurd: The Myth of Sisyphus – 1000-Word Philosophy: An Introductory Anthology',
 '1000-Word Philosophy: An Introductory Anthology Philosophy, One Thousand Words at a Time Author: Erik Van Aken\nCategory:\xa0Phenomenology and Existentialism, Ethics\nWord Count: 1000 “There is only one truly serious philosophical problem, and that is suicide.',
 'Judging whether life is or is not worth living amounts to answering the fundamental question of philosophy.”\xa0–\xa0 Albert Camus It might seem flippant to remark that the essential question in philosophy is “Should I kill myself?” But the question of suicide rests on what Camus considered the essential human problem: the sense in which our lives are entirely absurd.',
 'This essay will outline the origin and consequences of Camus’s notion of the absurd from his 1942 The Myth of Sisyphus.',
 '[1] There are many things we might naturally call absurd: a rude joke, an outrageous statement, or the price of a pair of designer jeans.'

In [8]:
# Create word vectors
word_embeddings = {}
f = open(r'C:\Users\bnawa\Data\glove.6B.100d.txt', encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_embeddings[word] = coefs
f.close()

In [10]:
type(f)

_io.TextIOWrapper

In [16]:
# Clean the text by removing punctuations, numbers, special characters and changing to lowercase
clean_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ")
clean_sentences = [s.lower() for s in clean_sentences]


# Remove stopwords
stop_words = stopwords.words('english')
def remove_stopwords(sen):
    sen_new = " ".join([i for i in sen if i not in stop_words])
    return sen_new
clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]

In [17]:
clean_sentences[:10]

['camus absurd myth sisyphus word philosophy introductory anthology',
 'word philosophy introductory anthology philosophy one thousand words time author erik van aken category phenomenology existentialism ethics word count one truly serious philosophical problem suicide',
 'judging whether life worth living amounts answering fundamental question philosophy albert camus might seem flippant remark essential question philosophy kill question suicide rests camus considered essential human problem sense lives entirely absurd',
 'essay outline origin consequences camus notion absurd myth sisyphus',
 'many things might naturally call absurd rude joke outrageous statement price pair designer jeans',
 'though camus means absurd camus absurd originates combination two things way want world way world actually',
 'want world seems part human nature sense justice fairness want world fair want evil punished virtue rewarded',
 'also want understand bad things happen good people good things happen bad

In [18]:
# Create sentence vectors
sentence_vectors = []
for i in clean_sentences:
    if len(i) != 0:
        v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
    else:
        v = np.zeros((100,))
    sentence_vectors.append(v)

In [20]:
sentence_vectors[:1]

[array([ 0.03586977,  0.19389476,  0.2726899 ,  0.37198073,  0.11881065,
         0.32537743,  0.17772092, -0.51747024,  0.03076473, -0.06257893,
        -0.19501448,  0.2774534 , -0.33692163,  0.22008559,  0.14922786,
         0.08476252, -0.01160093,  0.14273278,  0.00314447,  0.20297237,
         0.15674804, -0.00537395, -0.23808467, -0.11559442,  0.32124484,
        -0.15754716, -0.00678927, -0.13180065, -0.1589435 , -0.10319959,
        -0.34406924,  0.09840905, -0.563272  , -0.19713762, -0.53251594,
         0.355869  , -0.17206374,  0.20702286,  0.06178641, -0.43907386,
        -0.22145805,  0.38474977, -0.27716532, -0.04895672, -0.1651226 ,
        -0.16315897,  0.30688888,  0.04512184, -0.09164353,  0.05690426,
         0.22659555,  0.26833156,  0.45293084,  0.3919105 , -0.52248544,
        -0.7511448 ,  0.41199848,  0.2888591 ,  0.20851444,  0.21023372,
         0.37528554,  0.41801775, -0.18884613, -0.4889876 ,  0.66536176,
        -0.28472266,  0.25105888,  0.15371326, -0.0

In [21]:
# Create an empty similarity matrix
sim_mat = np.zeros([len(sentences), len(sentences)])

In [22]:
# Define cosine similarity (similarity as the normalized dot product of X and Y)
for i in range(len(sentences)):
    for j in range(len(sentences)):
        if i != j:
            sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]

In [24]:
# Use pagerank to create rank
nx_graph = nx.from_numpy_array(sim_mat)
scores = nx.pagerank(nx_graph)
ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)

for i in range(10):
    print(ranked_sentences[i][1])

Judging whether life is or is not worth living amounts to answering the fundamental question of philosophy.” –  Albert Camus It might seem flippant to remark that the essential question in philosophy is “Should I kill myself?” But the question of suicide rests on what Camus considered the essential human problem: the sense in which our lives are entirely absurd.
This though is not what Camus means by “absurd.” For Camus, the absurd originates from a combination of two things: the way we want the world to be and the way the world actually is.
Camus insists that we must persist in the face of absurdity and not give ourselves over to false hope; he ultimately suggests that life will be lived all the better if it has no meaning.
For example, Kierkegaard sees life as profoundly absurd, due to its central lack of meaning.
He thereby proposes that we take “a leap of faith,” essentially arguing that belief in God will ultimately provide one’s life with meaning.
It is up to us to live our lives