In [9]:
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
import networkx
from gensim.summarization import summarize

In [2]:
with open("/Users/ramakrishnareddych/Downloads/results.txt", encoding="utf-8") as f:
    text = f.read()
    
text

"Speaker1: Now That I'm Older it's so obvious when someone is not interested in even talking to you but they clearly have some there going through the motions to talk to you in a second why would you waste our time. The second one is going through conversation wanting to have a conversation if you die if you don't have a conversation don't because the other person is going to know it very quickly and it's going to make them feel weird I think a lot of men may be in the younger side of the more immature try to have a conversation with a woman because they think this is the barrier to entry like oh okay I have to pretend like I want to talk to her and then we'll have sex what is transactional that way especially women they are much more tuned to the seller details of life than a man ever is\nSpeaker1: General course people can tell if you have a motive you know obviously\nSpeaker2: it's because then eventually have to get my question out that I really want to talk to you about the thing\

In [5]:
sentences = nltk.sent_tokenize(text)
len(sentences)

2

In [6]:
# simce the nltk split the text into just 2 sentences, we're going with a custom text split 
sentences = text.split('\n')
len(sentences)

10

In [16]:
sentences

["Speaker1: Now That I'm Older it's so obvious when someone is not interested in even talking to you but they clearly have some there going through the motions to talk to you in a second why would you waste our time. The second one is going through conversation wanting to have a conversation if you die if you don't have a conversation don't because the other person is going to know it very quickly and it's going to make them feel weird I think a lot of men may be in the younger side of the more immature try to have a conversation with a woman because they think this is the barrier to entry like oh okay I have to pretend like I want to talk to her and then we'll have sex what is transactional that way especially women they are much more tuned to the seller details of life than a man ever is",
 'Speaker1: General course people can tell if you have a motive you know obviously',
 "Speaker2: it's because then eventually have to get my question out that I really want to talk to you about the

In [7]:
stop_words = nltk.corpus.stopwords.words('english')

In [12]:
def normalize_document(doc):
    # lower case and remove special characters\whitespaces
    doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
    doc = doc.lower()
    doc = doc.strip()
    # tokenize document
    tokens = nltk.word_tokenize(doc)
    # filter stopwords out of document
    filtered_tokens = [token for token in tokens if token not in stop_words]
    # re-create document from filtered tokens
    doc = ' '.join(filtered_tokens)
    return doc

normalize_text = np.vectorize(normalize_document)

In [13]:
normalize_sentences = normalize_text(sentences)

In [15]:
# Feature Engineering

t_vector = TfidfVectorizer(min_df=0., max_df=1., use_idf=True)
dt_matrix = t_vector.fit_transform(normalize_sentences)
dt_matrix = dt_matrix.toarray()

vocab = t_vector.get_feature_names()
td_matrix = dt_matrix.T
print(td_matrix.shape)
pd.DataFrame(np.round(td_matrix, 2), index=vocab).head(10)

(106, 10)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
always,0.0,0.0,0.0,0.32,0.0,0.0,0.0,0.0,0.0,0.0
anything,0.0,0.0,0.0,0.32,0.0,0.0,0.0,0.0,0.0,0.0
barrier,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bit,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
break,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
chance,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0
clearly,0.09,0.0,0.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0
comfortable,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0
complement,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
complementary,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.52,0.0,0.0


In [None]:
# MODEL - 1 Latent Semantic Analysis

In [17]:
def low_rank_svd(matrix, singular_count=2):
    u, s, vt = svds(matrix, k=singular_count)
    return u, s, vt

In [80]:
num_sentences = 6 # How many sentences we want our summary to be
num_topics = 2 # since we have only two speakers

u, s, vt = low_rank_svd(td_matrix, singular_count=num_topics)  
print(u.shape, s.shape, vt.shape)
term_topic_mat, singular_values, topic_document_mat = u, s, vt

(106, 2) (2,) (2, 10)


In [81]:
# removing singular values below threshold                                         
sv_threshold = 0.5
min_sigma_value = max(singular_values) * sv_threshold
singular_values[singular_values < min_sigma_value] = 0

In [82]:
sentence_scores = np.sqrt(np.dot(np.square(singular_values), np.square(topic_document_mat)))
sentence_scores

array([7.49810988e-01, 4.11306217e-01, 7.16819148e-01, 4.20653070e-01,
       5.83409587e-01, 5.01181979e-01, 1.78791120e-16, 6.72847054e-01,
       5.26884710e-01, 5.95290024e-16])

In [84]:
top_sentences = (sentence_scores).argsort()[:num_sentences]
top_sentences.sort()


In [85]:
# Summary with the help of Latent Semantic analysis

print('\n'.join(np.array(sentences)[top_sentences]))

Speaker1: General course people can tell if you have a motive you know obviously
we talked about the past as well someone will always remember how you made them feel much more than anything you've
want her conversation want to have a conversation be interesting to people so it has something to do with the first
point but it's definitely little bit
Speaker2: think you'll meet a single woman ever in your life who doesn't love a compliment but there's something to be said of a type of complement a delivery and the delivery and the Stella compliment is going to make or break time



In [None]:
# MODEL - 2 TEXT RANK

In [86]:
similarity_matrix = np.matmul(dt_matrix, dt_matrix.T)
print(similarity_matrix.shape)
np.round(similarity_matrix, 2)

(10, 10)


array([[1.  , 0.05, 0.11, 0.09, 0.21, 0.26, 0.  , 0.09, 0.17, 0.  ],
       [0.05, 1.  , 0.05, 0.  , 0.  , 0.09, 0.  , 0.07, 0.03, 0.  ],
       [0.11, 0.05, 1.  , 0.  , 0.12, 0.15, 0.  , 0.23, 0.03, 0.  ],
       [0.09, 0.  , 0.  , 1.  , 0.07, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.21, 0.  , 0.12, 0.07, 1.  , 0.  , 0.  , 0.06, 0.12, 0.  ],
       [0.26, 0.09, 0.15, 0.  , 0.  , 1.  , 0.  , 0.  , 0.05, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  ],
       [0.09, 0.07, 0.23, 0.  , 0.06, 0.  , 0.  , 1.  , 0.04, 0.  ],
       [0.17, 0.03, 0.03, 0.  , 0.12, 0.05, 0.  , 0.04, 1.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]])

In [90]:
similarity_graph = networkx.from_numpy_array(similarity_matrix)
TR_Sentence_scores = networkx.pagerank(similarity_graph)
sentences_rank = sorted(((score, index) for index, score in TR_Sentence_scores.items()), reverse=True)


In [104]:
TR_top_sentences = [sentences_rank[index][1] 
                        for index in range(num_sentences)]
TR_top_sentences.sort()

In [105]:
# Summary with the help of TEXT RANK

print('\n'.join(np.array(sentences)[TR_top_sentences]))

Speaker1: Now That I'm Older it's so obvious when someone is not interested in even talking to you but they clearly have some there going through the motions to talk to you in a second why would you waste our time. The second one is going through conversation wanting to have a conversation if you die if you don't have a conversation don't because the other person is going to know it very quickly and it's going to make them feel weird I think a lot of men may be in the younger side of the more immature try to have a conversation with a woman because they think this is the barrier to entry like oh okay I have to pretend like I want to talk to her and then we'll have sex what is transactional that way especially women they are much more tuned to the seller details of life than a man ever is
Speaker2: it's because then eventually have to get my question out that I really want to talk to you about the thing
ever said so if you make someone feel comfortable or like clearly you're just using 

In [None]:
# MODEL - 3 GENSIM SUMMARIZATION

In [111]:
# adjust the ratio to see what percentage of text the summary should be 

print(summarize(text, ratio=0.4, split=False))

Speaker1: Now That I'm Older it's so obvious when someone is not interested in even talking to you but they clearly have some there going through the motions to talk to you in a second why would you waste our time.
The second one is going through conversation wanting to have a conversation if you die if you don't have a conversation don't because the other person is going to know it very quickly and it's going to make them feel weird I think a lot of men may be in the younger side of the more immature try to have a conversation with a woman because they think this is the barrier to entry like oh okay I have to pretend like I want to talk to her and then we'll have sex what is transactional that way especially women they are much more tuned to the seller details of life than a man ever is
ever said so if you make someone feel comfortable or like clearly you're just using me to get to whatever ends you think that won't you usually won't get a second chance to make an impression on that p

In [116]:
# we can even modify the word count from the Gensim Summarization

print(summarize(text, word_count=150, split=False))

The second one is going through conversation wanting to have a conversation if you die if you don't have a conversation don't because the other person is going to know it very quickly and it's going to make them feel weird I think a lot of men may be in the younger side of the more immature try to have a conversation with a woman because they think this is the barrier to entry like oh okay I have to pretend like I want to talk to her and then we'll have sex what is transactional that way especially women they are much more tuned to the seller details of life than a man ever is
ever said so if you make someone feel comfortable or like clearly you're just using me to get to whatever ends you think that won't you usually won't get a second chance to make an impression on that person so you got it just don't
