In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import pipeline

import pandas as pd
import numpy as np

from nltk.corpus import stopwords
import string
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk import pos_tag
from nltk.tokenize import sent_tokenize, word_tokenize

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Omdena/AI for soft skills/Dummy_data_soft_skills_v2.csv')
df

Unnamed: 0,Title,Content
0,listening,Listening is one of the most important aspects...
1,listening,Empathy is the ability to understand the feeli...
2,listening,The way to improve your listening skills is to...
3,listening,Listening is the ability to accurately receive...
4,listening,Empathy is the ability to understand the feeli...
5,speaking,"Conversation is the basis of communication, an..."
6,speaking,Communication begins with you. \nTake responsi...
7,speaking,Think before you speak. \nMost of us work best...
8,speaking,"As you formulate a speech strategy, put yourse..."
9,non-verbal communication,"When we talk about things that matter to us, t..."


***Summarization***

In [None]:
# model : https://huggingface.co/facebook/bart-large-cnn
summarizer = pipeline('summarization', model="facebook/bart-large-cnn")

Downloading:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def summarize_content(content):
  return summarizer(content)[0]['summary_text'].strip()

df['Summary'] = df['Content'].apply(summarize_content)

Your max_length is set to 142, but you input_length is only 102. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=51)
Your max_length is set to 142, but you input_length is only 111. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)
Your max_length is set to 142, but you input_length is only 118. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)
Your max_length is set to 142, but you input_length is only 82. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)
Your max_length is set to 142, but you input_length is only 96. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)
Your max_length is set to 142, but you input_length is only 74. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=37)
Your max_length is set to 142, but you input_length is only 94. You might consi

In [None]:
df.head()

Unnamed: 0,Title,Content,Summary
0,listening,Listening is one of the most important aspects...,Successful listening is not just about underst...
1,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...
2,listening,The way to improve your listening skills is to...,The way to improve your listening skills is to...
3,listening,Listening is the ability to accurately receive...,Listening is the ability to accurately receive...
4,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...


**Text preprocessing**

In [None]:
X = df['Summary'].values

In [None]:
Xd = [word_tokenize(i) for i in X]

In [None]:
stop = set(stopwords.words('english'))
punct = list(string.punctuation)
stop.update(punct)

In [None]:
lem = WordNetLemmatizer()


def get_simple_pos(tag):
   if tag.startswith('J'):
        return wordnet.ADJ
   
    
   elif tag.startswith('N'):
       return wordnet.NOUN
    
   elif tag.startswith('V'):
       return wordnet.VERB
    
   elif tag.startswith('R'):
       return wordnet.ADV
    
   else:
      return wordnet.NOUN

  

def clean(words):
  output = []

  for w in words:
    if w.lower() not in stop:
      pos = pos_tag([w])
      clean_word = lem.lemmatize(w, get_simple_pos(pos[0][1]))
      output.append(clean_word.lower())
  return output



In [None]:
Xc = [clean(i) for i in Xd]

In [None]:
Xf = [' '.join(i) for i in Xc]

In [None]:
df['cleaned_text'] = Xf
df.head()

Unnamed: 0,Title,Content,Summary,cleaned_text
0,listening,Listening is one of the most important aspects...,Successful listening is not just about underst...,successful listen understand spoken write info...
1,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...
2,listening,The way to improve your listening skills is to...,The way to improve your listening skills is to...,way improve listen skill practice active liste...
3,listening,Listening is the ability to accurately receive...,Listening is the ability to accurately receive...,listening ability accurately receive interpret...
4,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...


**Zero Shot Classification**

In [None]:
skill_labels = ['Communication', 'Teamwork', 'Problem solving', 'Time management',
              'Critical thinking', 'Decision making', 'Organizational', 'Stress management',
              'Adaptability', 'Conflict management', 'Leadership', 'Creativity',
              'Resourcefulness', 'Persuasion', 'Openness to criticism', 'Confidence',
              'Mutual respect', 'Empathy', 'Listening', 'Constructive feedback', 'Collaboration',
              'Negotiation']

In [None]:
classifier = pipeline ("zero-shot-classification", model = "facebook/bart-large-mnli")

Downloading:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
paragraphs = df['cleaned_text']
zero_shot = []

for par in paragraphs:
  result = classifier(par, skill_labels)
  index = np.argmax(result['scores'], axis = 0)
  zero_shot.append(result['labels'][index])

df['Zero-shot'] = zero_shot
df.head()

Unnamed: 0,Title,Content,Summary,cleaned_text,Zero-shot
0,listening,Listening is one of the most important aspects...,Successful listening is not just about underst...,successful listen understand spoken write info...,Listening
1,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...,Organizational
2,listening,The way to improve your listening skills is to...,The way to improve your listening skills is to...,way improve listen skill practice active liste...,Listening
3,listening,Listening is the ability to accurately receive...,Listening is the ability to accurately receive...,listening ability accurately receive interpret...,Listening
4,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...,Empathy


In [None]:
df

Unnamed: 0,Title,Content,Summary,cleaned_text,Zero-shot
0,listening,Listening is one of the most important aspects...,Successful listening is not just about underst...,successful listen understand spoken write info...,Listening
1,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...,Organizational
2,listening,The way to improve your listening skills is to...,The way to improve your listening skills is to...,way improve listen skill practice active liste...,Listening
3,listening,Listening is the ability to accurately receive...,Listening is the ability to accurately receive...,listening ability accurately receive interpret...,Listening
4,listening,Empathy is the ability to understand the feeli...,Empathy is the ability to understand the feeli...,empathy ability understand feeling thought bel...,Empathy
5,speaking,"Conversation is the basis of communication, an...","Even a simple, friendly conversation with coll...",even simple friendly conversation colleague bu...,Mutual respect
6,speaking,Communication begins with you. \nTake responsi...,Good communication requires far more than what...,good communication require far express write m...,Communication
7,speaking,Think before you speak. \nMost of us work best...,If the conversation or meeting is worth your p...,conversation meeting worth precious time take ...,Communication
8,speaking,"As you formulate a speech strategy, put yourse...",No one can predict with certainty how other pe...,one predict certainty people react formulate s...,Listening
9,non-verbal communication,"When we talk about things that matter to us, t...","Non-verbal signals are wordless communication,...",non-verbal signal wordless communication body ...,Communication
