In [1]:
# 📦 Step 1: Install Required Libraries
!pip install nltk scikit-learn --quiet
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# 📚 Step 2: Import Libraries
import re
import nltk
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from google.colab import files
import io

In [3]:
# 📥 Step 3: Download NLTK Data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
# Step 4: Define summarization function with TF-IDF
def summarize_chat_log(text, filename):
    user_messages = []
    ai_messages = []

    lines = text.strip().split('\n')
    for line in lines:
        match = re.match(r"(User|AI)\s*:\s*(.+)", line)
        if match:
            speaker = match.group(1)
            message = match.group(2).strip()
            if speaker == "User":
                user_messages.append(message)
            elif speaker == "AI":
                ai_messages.append(message)

    user_count = len(user_messages)
    ai_count = len(ai_messages)
    exchange_count = user_count + ai_count

    # Keyword extraction using TF-IDF
    all_text = user_messages + ai_messages
    stop_words = stopwords.words('english')
    tfidf_vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=5)
    tfidf_matrix = tfidf_vectorizer.fit_transform(all_text)
    top_keywords = tfidf_vectorizer.get_feature_names_out()

    # Human-like topic sentence
    if len(top_keywords) > 0:
        main_topic = ", ".join(top_keywords[:2])
        topic_summary = f"The user asked mainly about {main_topic} and related topics."
    else:
        topic_summary = "The conversation was general without a clear specific topic."

    # 🔎 Print original conversation
    print(f"\n================= 📄 {filename} =================")
    print("\n--- Conversation ---")
    for u, a in zip(user_messages, ai_messages):
        print(f"User: {u}")
        print(f"AI: {a}")
    if len(user_messages) > len(ai_messages):
        print(f"User: {user_messages[-1]}")
    elif len(ai_messages) > len(user_messages):
        print(f"AI: {ai_messages[-1]}")

    # 📝 Final summary
    print("\n--- Summary ---")
    print(f"- The conversation had {exchange_count} exchanges.")
    print(f"- {topic_summary}")
    print(f"- Most common keywords: {', '.join(top_keywords)}")

In [5]:
# Step 5: Upload multiple chat files
print("\n📁 Please upload one or more .txt chat log files now:")
uploaded_files = files.upload()

# Step 6: Loop through each uploaded file
for filename, filedata in uploaded_files.items():
    text = io.StringIO(filedata.decode('utf-8')).read()
    summarize_chat_log(text, filename)


📁 Please upload one or more .txt chat log files now:


Saving AI.txt to AI.txt


--- Conversation ---
User: Hello, can you explain what Artificial Intelligence is?
AI: Of course! Artificial Intelligence, or AI, refers to the simulation of human intelligence in machines that are programmed to think and learn like humans. It's about creating systems that can perform tasks that typically require human intellect, like problem-solving, understanding language, recognizing patterns, and making decisions. Essentially, we're teaching computers to "think" in a way that mimics our own cognitive processes, allowing them to process information, learn from data, and adapt to new situations.
User: What are some common applications of AI?
AI: AI is used in so many areas today, often in ways you might not even realize. Some very common applications include natural language processing (NLP), which powers voice assistants like Siri and Alexa, allowing them to understand and respond to your spoken commands. NLP is also what enables translation services, spam 

In [6]:
# Step 5: Upload multiple chat files
print("\n📁 Please upload one or more .txt chat log files now:")
uploaded_files = files.upload()

# Step 6: Loop through each uploaded file
for filename, filedata in uploaded_files.items():
    text = io.StringIO(filedata.decode('utf-8')).read()
    summarize_chat_log(text, filename)


📁 Please upload one or more .txt chat log files now:


Saving Climate.txt to Climate.txt
Saving Human_Brain.txt to Human_Brain.txt


--- Conversation ---
User: Hey there! What's the latest with climate change?
AI: Hi! Climate change continues to be a major global concern. We're seeing ongoing global temperature increases, more frequent and intense extreme weather events, and significant impacts on ecosystems and human societies.
User: That sounds serious. What's causing it?
AI: The primary cause is the increase in greenhouse gas emissions from human activities, such as burning fossil fuels for energy, deforestation, and industrial processes. These gases trap heat in the atmosphere, leading to the warming trend.
User: What are some of the potential solutions?
AI: There are many approaches to addressing climate change, including transitioning to renewable energy sources like solar and wind power, improving energy efficiency, implementing carbon capture technologies, promoting sustainable land use, and fostering international cooperation to r