# Main notebook to process indexed datasets into summaries.

## Generate sentence pairs

In [21]:
import itertools
import json
import nltk
import pandas as pd

In [22]:
save_dir = input("enter path to base directory to save outputs to:\n")

enter path to base directory to save outputs to:
 ../data/dummy


In [23]:
input_path = input("enter path to indexed dataset:\n")  # default encoding is utf-8
with open(input_path) as f:
    sents_df = pd.read_csv(f, delimiter=',')
sents_df.head()

enter path to indexed dataset:
 ../data/dummy/dummy1.csv


Unnamed: 0,index,sentence
0,0,"People with Virgo rising tend to be practical,..."
1,1,You have an ingenious mind.
2,2,You have an active mind.
3,3,You have an alert mind.
4,4,Gaining knowledge and putting it to good use a...


In [24]:
indices = list(sents_df.index)

# generate all pairs in the set of sentences.
indices_list = list(itertools.combinations(indices, 2))
print('no. of sentences: ', len(indices))
print('no. of sentence pairs: ', len(indices_list), '\n')

no. of sentences:  11
no. of sentence pairs:  55 



In [25]:
# generate all sentence pairs and save into a csv for manual labelling.
data = []
for pair in indices_list:
    data.append([pair[0], pair[1], sents_df.iloc[pair[0]]['sentence'], sents_df.iloc[pair[1]]['sentence']])

df = pd.DataFrame(data=data, columns=['first_index', 'second_index', 'first', 'second'])
display(df)

Unnamed: 0,first_index,second_index,first,second
0,0,1,"People with Virgo rising tend to be practical,...",You have an ingenious mind.
1,0,2,"People with Virgo rising tend to be practical,...",You have an active mind.
2,0,3,"People with Virgo rising tend to be practical,...",You have an alert mind.
3,0,4,"People with Virgo rising tend to be practical,...",Gaining knowledge and putting it to good use a...
4,0,5,"People with Virgo rising tend to be practical,...",You strive for perfection.
5,0,6,"People with Virgo rising tend to be practical,...",You can be quite the person to live with or to...
6,0,7,"People with Virgo rising tend to be practical,...","At times others can never be ""good"" enough to ..."
7,0,8,"People with Virgo rising tend to be practical,...",Finding fault with what's wrong with things is...
8,0,9,"People with Virgo rising tend to be practical,...","Sometimes, though, this can make relationships..."
9,0,10,"People with Virgo rising tend to be practical,...",Pessimism and being too self-critical are two ...


In [26]:
is_save = input('save indexed paired dataset? (y/n):\n')
if is_save.lower() == 'y':
    df.to_csv(save_dir + '/' + 'paired_sentences.csv')
else:
    print('not saving indexed paired dataset.')

save indexed paired dataset? (y/n):
 y


## Perform inferences of distances between sentences in a sentence pair

In [27]:
import torch
from fairseq.data.data_utils import collate_tokens
from math import exp
import numpy as np

class RobertaMNLI:
    # todo: create similar wrapper classes for other NLI engines.
    # todo: create a wrapper superclass and subclass from there.
    
    def __init__(self, rel_path):
        """
        :param rel_path: relative path to pytorch hub directory.
        """
        self.output_map = {
            0: 'contradiction',
            1: 'neutral',
            2: 'entailment'
        }
        
        torch.hub.set_dir(rel_path)
        self.roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')  # works
        self.roberta.cuda()
        self.roberta.eval()
        
    def predict_one(self, S1, S2, return_probs=False):
        batch = collate_tokens(
            [self.roberta.encode(S1, S2)], pad_idx=1
        )
        logprobs = self.roberta.predict('mnli', batch)
        classes_tsr = logprobs.argmax(dim=1)
        classes = classes_tsr.tolist()  
        
        if return_probs == True:
            logprobs_list=[logprob.item() for logprob in logprobs[0]]
            prob_list =  [pow(exp(1), logprob) for logprob in logprobs_list]
            return prob_list
        else:
            return classes[0]  # 0 is contradiction, 1 is neutral, 2 is entailment.

In [28]:
predictor = RobertaMNLI(rel_path="../../roberta/hub")

Using cache found in ../../roberta/hub\pytorch_fairseq_master


In [31]:
results = list()
for i in range(len(df)):
    s1 = df.iloc[i]['first']
    s2 = df.iloc[i]['second']
    CNE1 = predictor.predict_one(s1, s2, return_probs=True)
    CNE2 = predictor.predict_one(s2, s1, return_probs=True)
    row = CNE1+ CNE2
    results.append(row)

scores_df = pd.DataFrame(data=results, columns=['C1', 'N1', 'E1', 'C2', 'N2', 'E2'])
scores_df.to_csv(save_dir + '/' + 'nliScores.csv')

In [32]:
def gen_dist2(row):
    '''
    Generate distance score based on Contradiction, Entailment and Neutrality probabilities for both directions.
    Assumes that C+E+N=1.
    '''    
    max1 = max([row['C1'], row['E1'], row['N1']])
    max2 = max([row['C2'], row['E2'], row['N2']])
    if max1 > max2:
        C, E, N = row['C1'], row['E1'], row['N1']
    else:
        C, E, N = row['C2'], row['E2'], row['N2']
    
    if C > 0.5:
        return C
    elif C == E:
        return 0.5
    else:
        return N/(N+E)*0.5

In [33]:
dist_ser = scores_df.apply(lambda row: gen_dist2(row), axis=1)
dist_ser.name = 'dist'
scores_df = scores_df.join(dist_ser)
scores_df = df.join(scores_df)
scores_df.head()

Unnamed: 0,first_index,second_index,first,second,C1,N1,E1,C2,N2,E2,dist
0,0,1,"People with Virgo rising tend to be practical,...",You have an ingenious mind.,0.01499,0.901193,0.083817,0.168603,0.702198,0.129199,0.457454
1,0,2,"People with Virgo rising tend to be practical,...",You have an active mind.,0.021296,0.439075,0.539628,0.890627,0.093778,0.015595,0.890627
2,0,3,"People with Virgo rising tend to be practical,...",You have an alert mind.,0.002648,0.244821,0.752531,0.061536,0.715761,0.222702,0.122736
3,0,4,"People with Virgo rising tend to be practical,...",Gaining knowledge and putting it to good use a...,0.001137,0.633402,0.36546,0.029134,0.66727,0.303596,0.343647
4,0,5,"People with Virgo rising tend to be practical,...",You strive for perfection.,0.001929,0.536462,0.461609,0.041221,0.582456,0.376323,0.303749


## Clustering based on precomputed pairwise distances.

In [34]:
import numpy as np
import pandas as pd

import scipy
from scipy.cluster.hierarchy import dendrogram,linkage
from scipy.cluster.hierarchy import fcluster
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist

import matplotlib.pyplot as plt
from pylab import rcParams
import seaborn as sb

import sklearn
# from sklearn import datasets
from sklearn.cluster import AgglomerativeClustering
import sklearn.metrics as sm
from sklearn.preprocessing import scale

# Configure the output
np.set_printoptions(precision=4,suppress=True)
%matplotlib inline
rcParams["figure.figsize"] =20,10
sb.set_style("whitegrid")

In [36]:
# convert this original dataframe to a 'square' distance matrix.
# we utilise a pivot table to do this

# first, add the redundant pairs (two of the same sentences) to the data.
# we need this for the pivot function to generate the correct indices and columns.
first = np.unique(df['first_index'].values)
second = np.unique(df['second_index'].values)
combined_arr = np.concatenate((first, second))
sentences = np.unique(combined_arr)
add_data = np.column_stack([sentences, sentences, np.zeros(len(sentences))])
add_data = add_data.astype(np.int)

print(add_data)

[[ 0  0  0]
 [ 1  1  0]
 [ 2  2  0]
 [ 3  3  0]
 [ 4  4  0]
 [ 5  5  0]
 [ 6  6  0]
 [ 7  7  0]
 [ 8  8  0]
 [ 9  9  0]
 [10 10  0]]


In [None]:
add_df = pd.DataFrame(data=add_data, columns=['first_index','second_index','dist'])
scores_df = scores_df.append(add_df, ignore_index=True)
scores_df.tail()

In [41]:
df_piv = scores_df.pivot("first_index", "second_index", "dist").fillna(0)
piv_arr = df_piv.values
dist_mat = piv_arr + np.transpose(piv_arr)

from scipy.spatial.distance import squareform
condensed_mat = squareform(dist_mat)

sil_scores = []  # silhouette scores
for i in range(2, len(dist_mat)):
    clusters = AgglomerativeClustering(n_clusters=i, affinity='precomputed', linkage='single')
    clusters.fit_predict(dist_mat)

    # use metric to measure/compare goodness of clusters based on internal coherence and 
    # external coupling.
    # consider Dunn index. maximise the Dunn index.
    # or consider average sillhouette width (maximise). available in sklearn.
    from sklearn.metrics import silhouette_score

    silhouette_avg = silhouette_score(dist_mat, clusters.labels_, metric='precomputed')
    sil_scores.append(silhouette_avg)
    print(i, ": ", silhouette_avg)

2 :  0.005174340807210396
3 :  0.019980323299382898
4 :  0.015499376680270722
5 :  0.053601191734335224
6 :  0.0903343067936031
7 :  0.08516242802707301
8 :  0.12056126734283829
9 :  0.20357576211133635
10 :  0.17240795209773607


In [43]:
sil_scores_arr = np.asarray(sil_scores)
print('max avg silhouette score: ', sil_scores_arr.max())
OPTIMAL_K = np.where(sil_scores_arr == sil_scores_arr.max())[0][0] + 2
print('nclusters w max avg silhouette score: ', optimal_k)

max avg silhouette score:  0.20357576211133635
nclusters w max avg silhouette score:  9


In [44]:
clusters = AgglomerativeClustering(n_clusters=OPTIMAL_K, affinity='precomputed', linkage='single')
clusters.fit_predict(dist_mat)
for i in range(clusters.n_clusters_):
    indices = np.where(clusters.labels_ == i)[0]
    
    print(f'cluster {i} (size {len(indices)}):')
    print(indices)
    for idx in indices:
        display(df_piv.index[idx])
    print('\n')

cluster 0 (size 3):
[1 2 3]


1

2

3



cluster 1 (size 1):
[9]


9



cluster 2 (size 1):
[10]


10



cluster 3 (size 1):
[7]


7



cluster 4 (size 1):
[8]


8



cluster 5 (size 1):
[6]


6



cluster 6 (size 1):
[4]


4



cluster 7 (size 1):
[5]


5



cluster 8 (size 1):
[0]


0





In [45]:
# save clusters into JSON dict
clusters_dict = dict()
for i in range(clusters.n_clusters_):
    indices = np.where(clusters.labels_ == i)[0]
    sents = list()
    for idx in indices:
        sents.append(int(df_piv.index[idx]))
    clusters_dict[i] = sents

path = save_dir + '/' + 'clusters.json'
with open(path, 'w') as fp:
    json.dump(clusters_dict, fp)

In [46]:
# generate pairwise matrix for saving.
df_piv_symmetric = pd.DataFrame(data=dist_mat, index=df_piv.index, columns=df_piv.columns)
df_piv_symmetric.head()

path = save_dir + '/' + 'pairwiseMatrix.csv'
df_piv_symmetric.to_csv(path)

In [47]:
# Compute the distance of one cluster to another cluster by the average pairwise distance
# of the elements in one cluster and another cluster.  
for n_clusters in [optimal_k]:
    print('------')
    print(f'\nno. of clusters = {n_clusters}')
    clusters = AgglomerativeClustering(n_clusters=n_clusters, affinity='precomputed', linkage='single')
    clusters.fit_predict(dist_mat)
    
    # find average distances between clusters and output a new distance matrix
    labels = [i for i in range(clusters.n_clusters_)]
    avg_dist_arr = np.zeros(shape=(clusters.n_clusters_, clusters.n_clusters_))
    for i in range(clusters.n_clusters_):
        indices = np.where(clusters.labels_ == i)[0]
        labels.remove(i)
        for j in labels:
            other_indices = np.where(clusters.labels_ == j)[0]
            intra_dist_arr = np.zeros(shape=(len(indices), len(other_indices)))
            
            for k, index in enumerate(indices):
                for m, other_index in enumerate(other_indices):
                    intra_dist_arr[k][m] = dist_mat[index][other_index]
            
            #print (f'cluster {i} against cluster {j}:')
            #print(intra_dist_arr)
            
            avg_dist = np.average(intra_dist_arr)
            #print(avg_dist)
            avg_dist_arr[i][j] = avg_dist
    print(avg_dist_arr)

------

no. of clusters = 9
[[0.     0.4995 0.7335 0.4852 0.4677 0.4987 0.471  0.4909 0.4903]
 [0.     0.     0.4994 0.4995 0.4995 0.4996 0.4995 0.4994 0.4995]
 [0.     0.     0.     0.4954 0.9346 0.9931 0.4671 0.4107 0.4808]
 [0.     0.     0.     0.     0.4973 0.4977 0.4991 0.4988 0.499 ]
 [0.     0.     0.     0.     0.     0.4983 0.8313 0.9773 0.4282]
 [0.     0.     0.     0.     0.     0.     0.499  0.4926 0.4941]
 [0.     0.     0.     0.     0.     0.     0.     0.441  0.3436]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.3037]
 [0.     0.     0.     0.     0.     0.     0.     0.     0.    ]]


In [48]:
path = save_dir + '/' + 'clusterDistances.csv'
tmp = avg_dist_arr + np.transpose(avg_dist_arr)
np.savetxt(path, tmp, delimiter=",")

In [54]:
print(tmp)

[[0.     0.4995 0.7335 0.4852 0.4677 0.4987 0.471  0.4909 0.4903]
 [0.4995 0.     0.4994 0.4995 0.4995 0.4996 0.4995 0.4994 0.4995]
 [0.7335 0.4994 0.     0.4954 0.9346 0.9931 0.4671 0.4107 0.4808]
 [0.4852 0.4995 0.4954 0.     0.4973 0.4977 0.4991 0.4988 0.499 ]
 [0.4677 0.4995 0.9346 0.4973 0.     0.4983 0.8313 0.9773 0.4282]
 [0.4987 0.4996 0.9931 0.4977 0.4983 0.     0.499  0.4926 0.4941]
 [0.471  0.4995 0.4671 0.4991 0.8313 0.499  0.     0.441  0.3436]
 [0.4909 0.4994 0.4107 0.4988 0.9773 0.4926 0.441  0.     0.3037]
 [0.4903 0.4995 0.4808 0.499  0.4282 0.4941 0.3436 0.3037 0.    ]]


## Visualise network of clusters

In [50]:
import networkx as nx
import string

In [53]:
G = nx.from_numpy_matrix(tmp)
print(G.nodes())
print(G.edges().data())
#G = nx.relabel_nodes(G, dict(zip(range(len(G.nodes())),string.ascii_uppercase)))

import pydot
from networkx.drawing.nx_pydot import write_dot

print("using package pydot")
path = save_dir + '/' + 'clusterViz.dot'
write_dot(G, path)
# you can then use the various graphviz CLI to generate the image. E.g. "neato -Tpng clusterViz.dot -o clusterViz.png"
# see https://www.graphviz.org/pdf/neatoguide.pdf for more info on modifying dot files.
# fdp or sfdp can be used instead of neato. 

[0, 1, 2, 3, 4, 5, 6, 7, 8]
[(0, 1, {'weight': 0.4994605551465554}), (0, 2, {'weight': 0.7335071209512276}), (0, 3, {'weight': 0.48520478438236053}), (0, 4, {'weight': 0.46770655256857535}), (0, 5, {'weight': 0.49867255515323117}), (0, 6, {'weight': 0.47099926157662747}), (0, 7, {'weight': 0.4908618227767336}), (0, 8, {'weight': 0.49027210145478795}), (1, 2, {'weight': 0.49940064584282534}), (1, 3, {'weight': 0.49945796866543385}), (1, 4, {'weight': 0.49945678697938106}), (1, 5, {'weight': 0.4995587224847858}), (1, 6, {'weight': 0.4994986559109311}), (1, 7, {'weight': 0.49939709003843735}), (1, 8, {'weight': 0.49948659031512255}), (2, 3, {'weight': 0.4953569487060596}), (2, 4, {'weight': 0.9345504595731561}), (2, 5, {'weight': 0.9930647224858722}), (2, 6, {'weight': 0.46708509870903325}), (2, 7, {'weight': 0.41068669711139594}), (2, 8, {'weight': 0.4808462652065677}), (3, 4, {'weight': 0.49725615398226114}), (3, 5, {'weight': 0.49767272002938995}), (3, 6, {'weight': 0.49909451831466356

## Summarize

In [83]:
from nltk.tokenize import RegexpTokenizer

def choose_longest(sentences):
    tokenizer = RegexpTokenizer(r'\w+')
    max_len = 0
    final_sent = None
    for sent in sentences:
        tokens = tokenizer.tokenize(sent)
        if len(tokens) > max_len:
            max_len = len(sent)
            final_sent = sent
    return final_sent

from lexrank import LexRank
from lexrank.mappings.stopwords import STOPWORDS
def choose_salient(sentences):
    """
    
    """
    lxr = LexRank(sentences, stopwords=STOPWORDS['en'])
    top_sentence = lxr.get_summary(sentences[0], summary_size=1, threshold=.1)
    return top_sentence[0]
    

def summarize(clusters, intersentence, sent_df, word_limit=90, coherence_threshold=0.6, redundancy_threshold=0.3):
    """
    :param clusters: dict of clusters of the form {str(cluster_num): list(sentence_indices)}.
    :param intersentence: Pandas DataFrame of precomputed inter-sentence distance in pairwise matrix form.
    :param sent_df: Pandas DataFrame of indexed sentences.
    :param word_limit: maximum num of words for summary.
    :param coherence_threshold: maximum distance allowed for a sentence pair within the summary. valid values between 0-1.
    :param redundancy_threshold: minimum distance allowed for a sentence pair within the summary. valid values between 0-1.
    
    We need a redundancy_threshold because the clustering algorithm does not always manage to group 
    redundant sentences together?
    """
    if coherence_threshold <= redundancy_threshold:
        raise Exception
    
    tokenizer = RegexpTokenizer(r'\w+') # tokenize into word-like tokens.
    summary = list()
    word_count = 0
    
    sorted_clusters = sorted(clusters.items(), key=lambda x: len(x[1]), reverse=True)  # iterable of pairs.
    
    for key, clust in sorted_clusters:
        sentences = [sent_df.iloc[idx]['sentence'] for idx in clust]
        #selected = choose_longest(clust)
        selected = choose_salient([sentences])
        selected_idx = sent_df[sent_df.sentence == selected].index[0]
        tokens = tokenizer.tokenize(selected)
        if word_count + len(tokens) < word_limit:
            max_dist = 0
            min_dist = 1
            for sent_idx in summary:
                dist = intersentence.iloc[sent_idx][selected_idx]
                if dist > max_dist:
                    max_dist = dist
                if dist < min_dist:
                    min_dist = dist
            if max_dist < coherence_threshold and min_dist > redundancy_threshold:
                summary.append(selected_idx)
                word_count = word_count + len(tokens)
        else:
            break
    return summary

In [84]:
# test lexrank algorithm.
sentences = [sents_df.iloc[idx]['sentence'] for idx in clusters_dict[0]]
display(sentences)
choose_salient([sentences])

['You have an ingenious mind.',
 'You have an active mind.',
 'You have an alert mind.']

'You have an alert mind.'

In [85]:
# todo: think of statistical way to determine the coherence_threshold and redundancy_threshold.
summary = summarize(clusters_dict, df_piv_symmetric, sents_df, word_limit=50, coherence_threshold=0.55, redundancy_threshold=0.4)
print("no. of sentences in summary: ", len(summary))
print("no. of sentences in source: ", len(df_piv_symmetric.index.values))
print()
for sent_idx in summary:
    print(f"{sent_idx}: {sents_df.iloc[sent_idx]['sentence']}")
print()
for sent_idx in sorted(summary):
    print(f"{sent_idx}: {sents_df.iloc[sent_idx]['sentence']}")

no. of sentences in summary:  3
no. of sentences in source:  11

3: You have an alert mind.
9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.
7: At times others can never be "good" enough to meet those high standards.

3: You have an alert mind.
7: At times others can never be "good" enough to meet those high standards.
9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.


## Multiple Summaries

In [171]:
def multi_summary(clusters, intercluster_dists, sent_df, word_limit=100, contradiction_threshold=0.6):
    """
    :param clusters: dict of clusters of the form {str(cluster_index): list(sentence_indices)}.
    :param intercluster_dists: square numpy.ndarray representing average distances between each cluster.
    :param sent_df: Pandas DataFrame of indexed sentences.
    :param word_limit: maximum num of words for summary.
    :param contradiction_threshold: float value between 0 and 1, representing the value after which
    sentence pairs will be considered contradictory.
    """
    # find top/representative sentence of each cluster.
    top_sentences = dict()
    for clust_idx, clust in clusters.items():
        sentences = [sent_df.iloc[idx]['sentence'] for idx in clust]
        selected = choose_salient([sentences])
        selected_idx = sent_df[sent_df.sentence == selected].index[0]
        top_sentences[clust_idx] = selected_idx 
    
    # find clusters that are not contradictory w ALL other clusters.
    base_clusters = list()
    contradicted_pairs = list()
    contradicted_set= set()
    for i in range(len(intercluster_dists)):
        for j in range(i+1,len(intercluster_dists)):
            if intercluster_dists[i][j] > contradiction_threshold:
                contradicted_pairs.append((i,j))
                contradicted_set.add(i)
                contradicted_set.add(j)
        if i not in contradicted_set:
            base_clusters.append(i)
    
    # construct base summary
    base_summary = list()
    word_count = 0
    tokenizer = RegexpTokenizer(r'\w+') # tokenize into word-like tokens.
    sorted_clusters = sorted(clusters.items(), key=lambda x: len(x[1]), reverse=True)  # iterable of pairs.
    for clust_idx, clust in sorted_clusters:
        if clust_idx in base_clusters:
            selected_idx = top_sentences[clust_idx]
            selected = sent_df.iloc[selected_idx]['sentence']
            tokens = tokenizer.tokenize(selected)
            if word_count + len(tokens) < word_limit:
                base_summary.append(selected_idx)
                word_count = word_count + len(tokens)
            else:
                break
    
    # construct summaries w contentious information.
    all_summaries = list()
    added_sentences = set()
    for clust1, clust2 in contradicted_pairs:
        sent1 =  top_sentences[clust1]
        sent2 =  top_sentences[clust2]
        
        if sent1 not in added_sentences:
            all_summaries.append(base_summary + [sent1])
            added_sentences.add(sent1)
        if sent2 not in added_sentences:
            all_summaries.append(base_summary + [sent2])
            added_sentences.add(sent2)
    
    print(base_summary)
    return all_summaries, base_summary

In [172]:
from IPython.display import Markdown
all_summaries, base_summary = multi_summary(clusters_dict, tmp, sents_df, word_limit=100, contradiction_threshold=0.6)

print()

for summary in all_summaries:
    print("no. of sentences in summary: ", len(summary))
    print("no. of sentences in source: ", len(df_piv_symmetric.index.values))
    print()
    for sent_idx in sorted(summary):
        if sent_idx in base_summary:
            display(Markdown(f"**{sent_idx}: {sents_df.iloc[sent_idx]['sentence']}**"))
        else:
            display(Markdown(f"{sent_idx}: {sents_df.iloc[sent_idx]['sentence']}"))
    print()

[9, 7, 0]

no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

3: You have an alert mind.

**7: At times others can never be "good" enough to meet those high standards.**

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**


no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

**7: At times others can never be "good" enough to meet those high standards.**

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**

10: Pessimism and being too self-critical are two faults you should try to improve upon.


no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

**7: At times others can never be "good" enough to meet those high standards.**

8: Finding fault with what's wrong with things is your forte.

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**


no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

6: You can be quite the person to live with or to be around because your standards for yourself and others are so high.

**7: At times others can never be "good" enough to meet those high standards.**

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**


no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

4: Gaining knowledge and putting it to good use are important to you.

**7: At times others can never be "good" enough to meet those high standards.**

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**


no. of sentences in summary:  4
no. of sentences in source:  11



**0: People with Virgo rising tend to be practical, analytical, discriminating, fastidious, careful, exacting, attentive to details, methodical, quiet, unassuming, shy, critical, thoughtful, and somewhat self-centered.**

5: You strive for perfection.

**7: At times others can never be "good" enough to meet those high standards.**

**9: Sometimes, though, this can make relationships sour as you often turn your critical eye on the one you love and the things they do.**




In [174]:
df_piv_symmetric.iloc[3][10]

0.8955947273722543