### Read input data from Google Drive

In [None]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
# authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# get the raw data file
downloaded = drive.CreateFile({'id':"154ZPP7J54KPmq8TuFfX8kgAHb2ah0neo"})
downloaded.GetContentFile('design_thinking_data.csv')

In [None]:
# read file as panda dataframe
import pandas as pd

raw_data = pd.read_csv('design_thinking_data.csv')

### Clean up text

In [None]:
# split lines by '\n' character
import os 

LINE_SEP = os.linesep
raw_data['clean_text'] = raw_data['article_text'].apply(lambda s: str(s).split(LINE_SEP))

In [None]:
# download NLTK punctuations & stop words
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
# split the the text in the articles into sentences
from nltk.tokenize import sent_tokenize 

sentences = []
for article in raw_data['clean_text']:
    for paragraph in article:
        sentences.append(sent_tokenize(paragraph)) if paragraph else None

In [None]:
# flatten the list
sentences = [y for x in sentences for y in x]

In [None]:
# remove punctuations, numbers and special characters
clean_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ")

# make alphabets lowercase
clean_sentences = [s.lower() for s in clean_sentences]

In [None]:
# remove stop words
from nltk.corpus import stopwords

stop_words = stopwords.words('english')

def remove_stopwords(sen):
    sen_new = " ".join([i for i in sen if i not in stop_words])
    return sen_new

clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]

In [None]:
# drop empty items in the list
clean_sentences = [clean_sentence for clean_sentence in clean_sentences if clean_sentence != '']

### Embeddings

#### Word vectors

In [None]:
# get the Glove data file
downloaded = drive.CreateFile({'id':"1XlK7waXNOsGf3mdgMWiLVrDhCgmnDhr1"})
downloaded.GetContentFile('glove.6B.100d.txt')

In [None]:
# extract word vectors from GloVe
from numpy import asarray

word_embeddings = {}
with open('glove.6B.100d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = asarray(values[1:], dtype='float32')
        word_embeddings[word] = coefs

#### Sentence vectors

In [None]:
# build sentence vectors
from numpy import zeros

sentence_vectors = []
for i in clean_sentences:
    if len(i) != 0:
        v = sum([word_embeddings.get(w, zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
    else:
        v = zeros((100,))
    sentence_vectors.append(v)

### TextRank algorithm

In [None]:
# similarity matrix
import time

from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

sentence_vectors_sparse = sparse.csr_matrix(sentence_vectors)

start_time = time.time()
similarities = cosine_similarity(sentence_vectors_sparse)
print(f"Similarity matrix calculated in {time.time() - start_time:.2f} seconds")


Similarity matrix calculated in 0.46 seconds


In [None]:
import networkx as nx

nx_graph = nx.from_numpy_array(similarities)
scores = nx.pagerank(nx_graph)

In [None]:
# ranks
ranked_sentences = []

for i, s in enumerate(sentences[:100]): 
  ranked_sentences.append((scores[i], s))

ranked_sentences.sort(key=lambda x: x[1], reverse=True)

In [None]:
# ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)

### Results

In [None]:
# Specify number of sentences to form the summary
sn = 15

# Generate summary
for i in range(sn):
        print(f"\n{ranked_sentences[i][1]}")


are developed.

[9] Whereas for "tame" or "well-defined" problems the problem is clear, and the solution is available through applying rules or technical knowledge.

[8]

[7] Core features of design thinking include the abilities to:

[5][6]

[3][4] Some of these prescriptions have been criticized for oversimplifying the design process and trivializing the role of technical knowledge and skills.

[28][4] In the 2000s there was a significant growth of interest in design thinking as a catalyst for gaining competitive advantage within business,[29] but doubts around design thinking as a panacea for success have also been expressed.

[26][27] Designers approach users with the goal of understanding their wants and needs, what might make their life easier and more enjoyable and how technology can be useful for them.

[25]

[25]

[25]

[25]

[24] Projects may loop back through inspiration, ideation, and implementation more than once as the team refines its ideas and explores new directions.
