In [1]:
"""Thought Process"""

"""I have a text file that contains conversation between a user and an AI chatbot.
  Task 1.: My task is to separate the messages by speaker and AI."""

"""Step 1: load the text file that contains the conversation
   Step 2: read the conversation and store them in a variable called lines"""

path = '/content/chat.txt'
with open(path, 'r', encoding='utf-8') as f:
    lines = f.readlines()
print(lines)

['User: Hi, can you tell me about Python?\n', 'AI: Sure! Python is a popular programming language known for\n', 'its readability.\n', 'User: What can I use it for?\n', 'AI: You can use Python for web development, data analysis,\n', 'AI, and more.']


In [2]:
""" You can see output of the conversation here"""
lines

['User: Hi, can you tell me about Python?\n',
 'AI: Sure! Python is a popular programming language known for\n',
 'its readability.\n',
 'User: What can I use it for?\n',
 'AI: You can use Python for web development, data analysis,\n',
 'AI, and more.']

In [3]:
""""Step 3: Separate the User: and AI: part from the conversation and store the ID
    of the speaker in a variable and the message in other variable using strip() function."""


exchanges = []
for raw in lines:
  raw = raw.strip()
  if raw.startswith('User:'):
    speaker, msg = 'User', raw[len('User:'):].strip()
  elif raw.startswith('AI:'):
    speaker, msg = 'AI', raw[len('AI:'):].strip()
  else:
    continue
  exchanges.append((speaker, msg))

In [4]:
""" You can see the parsed output of the conversation here"""
exchanges

[('User', 'Hi, can you tell me about Python?'),
 ('AI', 'Sure! Python is a popular programming language known for'),
 ('User', 'What can I use it for?'),
 ('AI', 'You can use Python for web development, data analysis,')]

In [5]:
"""Task 2: Count total messages"""

total = len(exchanges)
user_count = 0
for speaker, msg in exchanges:
  if speaker == 'User':
    user_count += 1
ai_count = total - user_count

print(f"Total Messages: {total}")
print(f"User Messages: {user_count}")
print(f"AI Messages: {ai_count}")

Total Messages: 4
User Messages: 2
AI Messages: 2


In [6]:
"""Task 3: Keyword Analysis
    Step 1: import nltk for keyword extraction
    Step 2: use punkt_tab from nltk which will give us the split words from the texts
    Step 3: use stopwords set to identify the common set of words in english and exclude them
    step 4: get all the words excluding the common english words, use isalpha() for excluding punctuation mark
    step 5: use counter to count the frequency of a word"""
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words('english'))
from collections import Counter
all_msgs = []
for _, msg in exchanges:
  all_msgs.append(msg)
words = []
for msg in all_msgs:
  #tokenize the word of the messages
  tokens = word_tokenize(msg.lower())
  for token in tokens:
    if token.isalpha() and token.lower() not in stop_words:
      words.append(token.lower())
freq = Counter(words)
top5 = freq.most_common(5)
print(top5)

[('python', 3), ('use', 2), ('hi', 1), ('tell', 1), ('sure', 1)]


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
"""Task 4: Generate the summary"""
frq_w = []
for m, _ in top5:
  frq_w.append(m)
summary = (
    f"Summary:\n"
    f"- The conversation had {total} exchanges.\n"
    f"- The user asked mainly about {' and '.join(frq_w[:2])}.\n"
    f"- Most common keywords: {', '.join(frq_w)}."
)
print(summary)

Summary:
- The conversation had 4 exchanges.
- The user asked mainly about python and use.
- Most common keywords: python, use, hi, tell, sure.
