In [13]:
import pandas as pd
import re
from textblob import TextBlob
from collections import Counter


In [37]:
df = pd.read_excel('Assignment.xlsx')

articles = df['Article']
articles.head()


0    Retailers, the makers of foods marketed for we...
1    Move over, Ozempic — there’s a new drug in tow...
2    Sept 14 (Reuters) - Bristol Myers Squibb (BMY....
3    Austin Wolcott was 18 years old and pretty sur...
4    Cancer, often referred to as the “emperor of a...
Name: Article, dtype: object

## Cleaning Up Articles

In [39]:
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    stop_words = set(["the", "is", "in", "and", "to", "a", "of", "this", "that", "it", "for", "on", "with", "its", "are", "about", "though", "could"])
    return ' '.join([word for word in text.split() if word.lower() not in stop_words])

df['Cleaned_Article'] = df['Article'].apply(clean_text)

cleaned_articles[:5]  


['Retailers makers foods marketed weight loss other types companies could see knockon effects from rise diabetes weight loss drugs like Ozempic As they do every summer publicly traded companies posted their secondquarter results while Americans were baring their bodies beach But year timing was apt several earnings calls August chief executives reassured investors Ozempic revolution had not left them dust they could somehow share blazing success new diabetes weight loss drugs puts us good position be solution those who are drugs said Dan R Chard chief executive Medifast which makes diet products like shakes protein bars adding Theyre looking guidance He told analysts even while explaining newgeneration drugs had helped pummel earnings down 347 percent year year We will continue study Michael Johnson chief executive nutritional supplement maker Herbalife told investors when we see an opportunity capitalize we will theory opportunity both making profits losing fortunes could be vast not 

# Building the System

## Mood Checkup

In [40]:
def get_mood(article):
    blob = TextBlob(article)
    sentiment = blob.sentiment.polarity
    return 'Positive' if sentiment > 0 else 'Negative' if sentiment < 0 else 'Neutral'

df['Mood'] = df['Cleaned_Article'].apply(get_mood)

## Summarizing Articles

In [42]:
def summarize(article):
    blob = TextBlob(article)
    return ' '.join([str(sentence) for sentence in blob.sentences[:2]])

df['Summary'] = df['Article'].apply(summarize)

## Finding common themes

In [43]:
def find_common_themes(articles):
    stop_words = set(["the", "is", "in", "and", "to", "a", "of", "this", "that", "it", "for", "on", "with", "its", "are", "about", "though", "could"])
    all_words = ' '.join(articles).split()
    filtered_words = [word for word in all_words if word.lower() not in stop_words]
    word_freq = Counter(filtered_words)
    return word_freq.most_common(5)

common_themes = find_common_themes(df['Cleaned_Article'])
 

## Aspect analysis


In [46]:
def aspect_analysis(article):
    aspects = {
        'Innovation': 'Positive',
        'Cost': 'Negative',
        'Overall Plan': 'Positive',
        'Traffic Impact': 'Negative',
        'Sustainability': 'Positive',
        'Health Benefits': 'Positive',
        'Economic Stability': 'Negative'
    }
    
    aspect_keywords = {
        'Innovation': ['innovation', 'innovative', 'new', 'develop'],
        'Cost': ['cost', 'price', 'expensive', 'cheap'],
        'Overall Plan': ['plan', 'approved', 'project', 'development'],
        'Traffic Impact': ['traffic', 'congestion', 'commute'],
        'Sustainability': ['sustainable', 'environment', 'eco-friendly', 'green'],
        'Health Benefits': ['health', 'benefit', 'well-being'],
        'Economic Stability': ['economic', 'market', 'investment', 'finance']
    }
    
    article_lower = article.lower()
    aspect_sentiments = {}
    for aspect, keywords in aspect_keywords.items():
        if any(keyword in article_lower for keyword in keywords):
            aspect_sentiments[aspect] = aspects[aspect]
    return aspect_sentiments



## Displaying the results for each article

In [47]:
df['Aspect_Analysis'] = df['Cleaned_Article'].apply(aspect_analysis)

In [49]:
results = df[['Article', 'Cleaned_Article', 'Summary', 'Mood', 'Aspect_Analysis']]
results.head()

Unnamed: 0,Article,Cleaned_Article,Summary,Mood,Aspect_Analysis
0,"Retailers, the makers of foods marketed for we...",Retailers makers foods marketed weight loss ot...,"Retailers, the makers of foods marketed for we...",Positive,"{'Innovation': 'Positive', 'Cost': 'Negative',..."
1,"Move over, Ozempic — there’s a new drug in tow...",Move over Ozempic theres new drug town Eli Lil...,"Move over, Ozempic — there’s a new drug in tow...",Negative,"{'Innovation': 'Positive', 'Cost': 'Negative',..."
2,Sept 14 (Reuters) - Bristol Myers Squibb (BMY....,Sept 14 Reuters Bristol Myers Squibb BMYN said...,Sept 14 (Reuters) - Bristol Myers Squibb (BMY....,Positive,"{'Innovation': 'Positive', 'Cost': 'Negative',..."
3,Austin Wolcott was 18 years old and pretty sur...,Austin Wolcott was 18 years old pretty sure he...,Austin Wolcott was 18 years old and pretty sur...,Positive,"{'Innovation': 'Positive', 'Cost': 'Negative',..."
4,"Cancer, often referred to as the “emperor of a...",Cancer often referred as emperor all maladies ...,"Cancer, often referred to as the “emperor of a...",Positive,"{'Innovation': 'Positive', 'Cost': 'Negative',..."


In [50]:
# common themes
print("Common Themes Across Articles:")
for theme, freq in common_themes:
    print(f"{theme}: {freq}")

Common Themes Across Articles:
Nike: 104
their: 93
as: 85
has: 85
by: 81


## Example output for a single article


In [51]:
example_article = """The stock market experienced a downturn today, with many companies seeing a drop in their 
share prices. Investors are worried about future economic stability"""
example_cleaned_article = clean_text(example_article)
example_summary = summarize(example_cleaned_article)
example_mood = get_mood(example_cleaned_article)
example_aspects = aspect_analysis(example_cleaned_article)

print("Example Article:")
print(f"Original Text: {example_article}")
print(f"Cleaned Text: {example_cleaned_article}")
print(f"Summary: {example_summary}")
print(f"Mood Rating: {example_mood}")
print(f"Aspect Analysis: {example_aspects}")

Example Article:
Original Text: The stock market experienced a downturn today, with many companies seeing a drop in their 
share prices. Investors are worried about future economic stability
Cleaned Text: stock market experienced downturn today many companies seeing drop their share prices Investors worried future economic stability
Summary: stock market experienced downturn today many companies seeing drop their share prices Investors worried future economic stability
Mood Rating: Positive
Aspect Analysis: {'Cost': 'Negative', 'Economic Stability': 'Negative'}
