In [83]:
#!/usr/bin/env python
# coding: utf-8
import nltk
import pandas as pd 
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

In [84]:

 
def read_article(file_name):
    filedata=file_name
    article = filedata.split(". ")
    sentences = []

    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    
    return sentences

In [85]:

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)

In [86]:
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix

In [87]:
data=pd.read_csv('tcs_rev.csv')

In [88]:
data

Unnamed: 0.1,Unnamed: 0,date,tital,user,dislike,like,page_index
0,0,posted on 05 Sep 2018,System Engineer in Kolkata,Anonymous · Finance Division,Dislikes 1. Growth: Not much scope of growth. ...,Likes Few things that are good: \r\n1. Job sec...,2
1,1,posted on 12 Aug 2020,It Analyst in Kolkata,Anonymous · IoT Division,Dislikes Some of the managers are lagging with...,Likes I am carrying company provided laptop be...,2
2,2,posted on 26 Jul 2019,Associate Consultant in Bangalore,Anonymous · EIS Division,"Dislikes 1. First of all, everybody in TCS are...",Likes You can draw salary without doing any wo...,2
3,3,posted on 29 Aug 2020,Assistant Consultant (ITIS) in Whitefield,Anonymous · ITIS Division,Dislikes Nothing to dislike about working at TCS,Likes Good and excellent job security provided...,2
4,4,posted on 21 Aug 2020,Assistant System Engineer in Pune,Anonymous · It Services Division,Dislikes Salary appraisal is very low for begi...,Likes Apart from Job security the company prov...,2
...,...,...,...,...,...,...,...
155,155,posted on 27 Sep 2019,Process Associate in Nagpur,Anonymous · Logistics Division,Dislikes There is no career growth. I saw peo...,Likes I was so excited to work for TCS. But m...,17
156,156,posted on 31 Jul 2020,It Analyst in Bangalore,Anonymous · BFS Division,"Dislikes Compensation, work culture , appraisa...",Likes not supporting at all. Have been applyin...,17
157,157,posted on 30 Jul 2020,Senior Process Associate in Bengaluru,Anonymous · Investment banking Division,Dislikes Work pressure,Likes Job security is good and work culture an...,17
158,158,posted on 02 Aug 2020,Team Leader in Pune,Anonymous · Finance Division,Dislikes Work pressure,"Likes Job security is the best part of TCS, ev...",17


In [89]:
nltk.download("stopwords")
stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/siddharth/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [90]:
def generate_summary(file_name, top_n=5):
    
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    try:
        scores = nx.pagerank(sentence_similarity_graph)
    except Exception as e:
        print(e)
        return("NaN")

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    #print("Indexes of top ranked_sentence order are ", ranked_sentence)    
    for i in range(top_n):
        try:
            #print("top_n---->",top_n)
            #print("ranked_sentence----->",ranked_sentence)
            summarize_text.append(" ".join(ranked_sentence[i][1]))
        except Exception as e:
            print(e)
            pass

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))

# let's begin
for i in data.like:
    generate_summary( i, 2)

Likes Few things that are good: 
1
Job security
In my opinion 80% of the  people who are in TCS is because of job security
TCS holds onto their employees even in tough situations in market although there was a large layoff in recent past
Still job security is a plus.
2.Work-life balance:  It is good
Most of the time there are no pressure and also work is not much challenging
So usually after the timid learning curve it is   easy work-wise
In support projects timing may be an issue because one may have to support weekend or at night but those are compensated
In development projects timing at beginning is casual but at time of delivery it is haywire because always there are no time but lot of development left
This should give an idea of poor management.
3
Benefits: Nice health benefits for employee and family, easy on leaves about maternity, wedding etc
...Read More
Summarize Text: 
 In my opinion 80% of the  people who are in TCS is because of job security. Still job security is a pl

  There is so much of partiality &  negative  atmosphere inside the company.  They don't know that prior to clients,  empoloyes come
Likes not supporting at all
Have been applying leaves as they could not provide device to WFH
list index out of range
Summarize Text: 
 Likes not supporting at all
Likes Job security is good and work culture and there is opportunity for learning and development.
list index out of range
list index out of range
Summarize Text: 
 
Likes Job security is the best part of TCS, even company is taking care of the associates who are on bench during this critical time, where people are loosing their jobs in other organisations.
list index out of range
list index out of range
Summarize Text: 
 
Likes Best company to work with.
Working with tcs was a superb experience.
Job security, lenient policies
list index out of range
list index out of range
Summarize Text: 
 


  return 1 - (numpy.dot(u, v) / (sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))
