# Install required packages

In [1]:
!pip install pandas nltk textblob python-docx
!python -m textblob.download_corpora

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unz

In [2]:
import pandas as pd
import nltk
from textblob import TextBlob
from docx import Document
from collections import defaultdict
from google.colab import files

#  Download NLTK resources

In [3]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [9]:
# Directly read the uploaded file from the Colab filesystem
df = pd.read_csv("/content/Companies_Sentiment_Analysis_7_Days_Track.csv")

# Optional: preview the data
df.head()

Unnamed: 0,Ticker,Title,Summary,URL,Datetime,News Age (Days),News Time,Current Price,1 Hour Ago,4 Hours Ago,Previous Day Close,7 Days Ago,1 Hour Ago % Change,4 Hours Ago % Change,Previous Day Close % Change,7 Days Ago % Change,Headline Sentiment,Summary Sentiment
0,NET,Cloudflare (NET) Reports Increased Revenue Ami...,Cloudflare (NET) recently announced significan...,https://finance.yahoo.com/news/cloudflare-net-...,2025-08-01 14:06:20-04:00,2,2025-08-01 14:06:20-04:00,199.16,201.7,205.23,207.71,199.06,-1.26,-2.96,-4.12,0.05,0.12,0.75
1,NET,Cloudflare Q2 Earnings and Revenues Beat Estim...,"NET beats Q2 earnings and revenue estimates, d...",https://finance.yahoo.com/news/cloudflare-q2-e...,2025-08-01 11:41:00-04:00,2,2025-08-01 11:41:00-04:00,209.33,205.23,206.79,210.22,199.46,2.0,1.23,-0.42,4.95,0.0,0.78
2,CRM,CRM vs. NOW: Which Workflow Automation Stock H...,ServiceNow and Salesforce tap rising demand fo...,https://finance.yahoo.com/news/crm-vs-now-work...,2025-08-01 11:58:00-04:00,2,2025-08-01 11:58:00-04:00,253.03,251.65,252.82,261.7,270.02,0.55,0.08,-3.31,-6.29,0.5,-0.13
3,PANW,Palo Alto Networks (PANW) to Acquire CyberArk ...,"Palo Alto Networks, Inc. (NASDAQ:PANW) is one ...",https://finance.yahoo.com/news/palo-alto-netwo...,2025-08-01 19:59:08-04:00,2,2025-08-01 19:59:08-04:00,172.86,172.86,172.86,173.62,203.27,0.0,0.0,-0.44,-14.96,0.0,0.61
4,PANW,SentinelOne (S) Gains High-Level CCN Certifica...,SentinelOne (S) recently achieved high-level c...,https://finance.yahoo.com/news/sentinelone-gai...,2025-08-01 14:11:46-04:00,2,2025-08-01 14:11:46-04:00,172.89,173.7,172.58,173.94,204.05,-0.47,0.18,-0.6,-15.27,0.0,0.36


In [10]:
# Define known financial keyword categories
KEYWORD_CATEGORIES = {
    "earnings": [
        "earnings", "profit", "profits", "revenue", "sales", "income",
        "loss", "losses", "guidance", "forecast", "EPS", "net income", "beat estimates",
        "missed estimates", "record profits", "quarterly results", "financial results", "operating margin"
    ],
    "merger": [
        "merger", "merging", "combine", "merge talks", "consolidation",
        "merger agreement", "merger approval", "merger blocked"
    ],
    "acquisition": [
        "acquisition", "acquire", "acquired", "buyout", "purchase", "takeover",
        "deal signed", "deal announced", "investment", "stake purchase", "ownership increase"
    ],
    "stimulus": [
        "stimulus", "government support", "funding", "bailout", "subsidy",
        "relief package", "stimulus package", "federal support", "tax credit"
    ],
    "FDA approval": [
        "FDA", "approval", "approved", "clinical", "clinical trial", "drug trial",
        "vaccine", "authorized", "emergency approval", "phase 3 trial", "trial success"
    ],
    "partnership": [
        "partnership", "collaboration", "joint venture", "strategic alliance",
        "deal partnership", "cooperation agreement", "multi-year contract"
    ],
    "product launch": [
        "launch", "introduced", "released", "new product", "rollout", "unveiled",
        "prototype", "product update", "new version", "flagship product"
    ],
    "dividend": [
        "dividend", "payout", "shareholder return", "buyback",
        "dividend increase", "dividend cut", "special dividend", "stock repurchase"
    ],
    "lawsuit": [
        "lawsuit", "legal case", "settlement", "court", "regulatory action",
        "antitrust", "fined", "penalty", "investigation", "regulator probe"
    ],
    "price target": [
        "price target", "upgrade", "downgrade", "analyst rating", "valuation",
        "buy rating", "sell rating", "neutral rating", "target raised", "target cut"
    ],
    "supply chain": [
        "supply chain", "logistics", "shipment", "delay", "inventory",
        "supply shortage", "backlog", "transportation issue", "production halt"
    ],
    "expansion": [
        "expansion", "growth plan", "new facility", "factory", "headquarters",
        "hiring", "job cuts", "layoffs", "new office", "opening new plant", "capacity increase"
    ],
    "market performance": [
        "stock surge", "stock drop", "market share", "benchmark", "index",
        "outperform", "underperform", "record high", "record low", "volatility", "sell-off", "rally"
    ],
    "technology": [
        "AI", "machine learning", "cloud", "software", "chip", "semiconductor",
        "innovation", "R&D", "5G", "blockchain", "cybersecurity", "patent approval"
    ],
    "economic outlook": [
        "inflation", "interest rates", "GDP", "unemployment", "recession",
        "economic slowdown", "rate hike", "rate cut", "consumer confidence"
    ],
    "environment": [
        "sustainability", "carbon", "green energy", "renewable", "ESG",
        "climate change", "solar project", "wind energy", "emission reduction"
    ],
    "crypto": [
        "bitcoin", "crypto", "cryptocurrency", "token", "blockchain",
        "NFT", "digital asset", "crypto regulation"
    ],
    "bankruptcy": [
        "bankruptcy", "insolvency", "debt restructuring", "chapter 11", "liquidation", "creditors"
    ],

    "leadership": [
        "CEO", "CFO", "executive", "leadership change", "resignation",
        "appointment", "management shuffle", "board member", "founder", "chairman"
    ],
    "data privacy": [
        "data breach", "cyber attack", "privacy", "security breach",
        "data leak", "ransomware", "hacked", "customer data", "information security"
    ],
    "regulatory": [
        "SEC", "compliance", "regulation", "regulatory", "approval process",
        "legal dispute", "financial oversight", "government policy", "sanction", "license"
    ],
    "labor": [
        "strike", "union", "labor dispute", "employee protest", "labor agreement",
        "collective bargaining", "walkout", "contract negotiation", "labor shortage"
    ],
    "customer activity": [
        "user growth", "subscription", "user base", "customer retention",
        "churn rate", "client expansion", "client loss", "subscriber count", "MAU", "DAU"
    ],
    "pricing": [
        "price hike", "pricing strategy", "discount", "rebate", "cost increase",
        "price cut", "premium pricing", "value pricing", "dynamic pricing"
    ],
    "competition": [
        "competitive pressure", "market rival", "challenger", "competing product",
        "market entry", "pricing war", "competition", "industry peer", "disruptor"
    ],
    "branding": [
        "rebrand", "branding strategy", "brand value", "brand loyalty",
        "brand recognition", "marketing campaign", "new logo", "brand refresh"
    ],
    "supply agreement": [
        "supply agreement", "long-term supply", "procurement deal",
        "supply contract", "vendor agreement", "sourcing deal"
    ],
    "legal win/loss": [
        "court victory", "lawsuit win", "lawsuit loss", "ruled in favor",
        "legal defeat", "legal win", "injunction", "verdict"
    ]
}

# Output updated count and list of categories
len(KEYWORD_CATEGORIES), list(KEYWORD_CATEGORIES.keys())

# Detect known keywords in a text
def detect_keywords(text):
    detected = []
    for category, words in KEYWORD_CATEGORIES.items():
        for word in words:
            if word.lower() in text.lower():
                detected.append(category)
                break  # One match per category is enough
    return detected or ['Other']

# Storage dictionary
company_keywords = defaultdict(lambda: {'positive': [], 'negative': []})

# 🔍 Process each row to extract keywords & sentiment
for _, row in df.iterrows():
    ticker = row['Ticker']
    summary = str(row['Summary'])  # Ensure it's a string

    blob = TextBlob(summary)
    sentiment = 'positive' if blob.sentiment.polarity >= 0 else 'negative'

    detected = detect_keywords(summary)

    company_keywords[ticker][sentiment].extend(detected)

for ticker in company_keywords:
    for sentiment in ['positive', 'negative']:
        keywords = list(set(company_keywords[ticker][sentiment]))
        company_keywords[ticker][sentiment] = sorted(keywords)

# Create the Word document
doc = Document()
doc.add_heading('Company Progress Keyword Summary', 0)

for ticker, sentiment_data in sorted(company_keywords.items()):
    doc.add_heading(ticker, level=1)

    pos_keywords = ', '.join(sentiment_data['positive']) or "No data"
    neg_keywords = ', '.join(sentiment_data['negative']) or "No data"

    doc.add_paragraph(f"✅ Positive Keywords: {pos_keywords}")
    doc.add_paragraph(f"❌ Negative Keywords: {neg_keywords}")
    doc.add_paragraph("")  # Spacer

#  Save and download the DOCX

In [11]:
output_file = "Company_Progress_Summary.docx"
doc.save(output_file)
files.download(output_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>