# Sentiment analysis of US tech media
This notebook performs sentiment analysis of US tech media.  
News are gathered via NewsAPI.  
Sentiment analysis is calculated with TextBlob.  
Named Entity Recognition is done with spaCy.  
The results are visualized in a separate Tableau visualization.

In [1]:
import pandas as pd
from configparser import ConfigParser
import sqlalchemy
from sqlalchemy import create_engine
import re
from textblob import TextBlob
import spacy
#!python -m spacy download en_core_web_sm

## Retrieve news from database

In [2]:
# Get keys from config file
parser = ConfigParser()
_ = parser.read('keys.cfg')

In [3]:
# Connect to database
engine = create_engine(parser.get('news','conn_string'))

In [4]:
# Read from the database and turn into dataframe
news_df = pd.read_sql_query('select * from "news"',con=engine)
sources_df = pd.read_sql_query('select * from "sources"',con=engine)

In [5]:
news_df.head()

Unnamed: 0,author,title,description,url,urlToImage,publishedAt,content,source.id,source.name
0,Stephen Clark,NASA wants Starliner to make a quick getaway f...,Starliner is set to land at White Sands Space ...,https://arstechnica.com/space/2024/09/boeings-...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T21:50:35Z,Enlarge/ Boeing's Starliner spacecraft is set ...,ars-technica,Ars Technica
1,Beth Mole,Person in Missouri caught H5 bird flu without ...,"The person recovered, and Missouri officials s...",https://arstechnica.com/science/2024/09/missou...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T23:02:37Z,Enlarge/ The influenza virus from an image pro...,ars-technica,Ars Technica
2,Kevin Purdy,"Balatro arrives on phones Sept. 26, so plan yo...",It has already sold 2 million copies. Now the ...,https://arstechnica.com/gaming/2024/09/balatro...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-05T17:38:52Z,Enlarge/ The energy captured by Balatro's mobi...,ars-technica,Ars Technica
3,Jennifer Ouellette,Jack Black stars as expert crafter Steve in A ...,"""Anything you can dream about here, you can ma...",https://arstechnica.com/culture/2024/09/magica...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T19:47:36Z,12\r\nJason Momoa and Jack Black star in A Min...,ars-technica,Ars Technica
4,Jennifer Ouellette,Cats play fetch more often than previously bel...,About 4 in 10 cats and nearly 8 in 10 dogs lik...,https://arstechnica.com/science/2024/09/cats-p...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T18:00:00Z,"9\r\nAlthough more common in dogs, 4 in 10 pet...",ars-technica,Ars Technica


## Preprocess text

In [6]:
def cleanText(sentence):
    sentence = str(sentence)
    
    # Special characters
    sentence = re.sub(r"[^a-zA-Z0–9\s]", "", sentence)
    # tags
    sentence = re.sub(r"<.*?>", " ", sentence)
    # links
    sentence = re.sub(r"http\S+", " ", sentence)
    # numbers
    sentence = re.sub("[0-9]+", " ", sentence)
    # newlines
    sentence = re.sub("\n", " ", sentence)
    # CR
    sentence = re.sub(r"^[\r\n]+|\.|[\r\n]+$"," ",sentence)

    return sentence

In [7]:
news_df['title_clean'] = news_df['title'].apply(cleanText)
news_df['description_clean'] = news_df['description'].apply(cleanText)
news_df['content_clean'] = news_df['content'].apply(cleanText)

## Calculate sentiment scores

In [8]:
# Get Subjectivity
def getSubjectivity(text):
   return TextBlob(text).sentiment.subjectivity

# Get Polarity
def getPolarity(text):
   return TextBlob(text).sentiment.polarity

In [9]:
def analyze_text_column(df, column):
    df[f'{column}_subjectivity'] = df[column].apply(getSubjectivity)
    df[f'{column}_polarity'] = df[column].apply(getPolarity)
    df[f'{column}_polarity_label'] = df[f'{column}_polarity'].apply(lambda x: 'negative' if x < 0 else 'neutral' if x == 0 else 'positive')

In [10]:
analyze_text_column(news_df, 'title_clean')
analyze_text_column(news_df, 'content_clean')
analyze_text_column(news_df, 'description_clean')

In [11]:
news_df.head()

Unnamed: 0,author,title,description,url,urlToImage,publishedAt,content,source.id,source.name,title_clean,...,content_clean,title_clean_subjectivity,title_clean_polarity,title_clean_polarity_label,content_clean_subjectivity,content_clean_polarity,content_clean_polarity_label,description_clean_subjectivity,description_clean_polarity,description_clean_polarity_label
0,Stephen Clark,NASA wants Starliner to make a quick getaway f...,Starliner is set to land at White Sands Space ...,https://arstechnica.com/space/2024/09/boeings-...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T21:50:35Z,Enlarge/ Boeing's Starliner spacecraft is set ...,ars-technica,Ars Technica,NASA wants Starliner to make a quick getaway f...,...,Enlarge Boeings Starliner spacecraft is set to...,0.3,0.266667,positive,0.0,0.0,neutral,0.251515,0.045455,positive
1,Beth Mole,Person in Missouri caught H5 bird flu without ...,"The person recovered, and Missouri officials s...",https://arstechnica.com/science/2024/09/missou...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T23:02:37Z,Enlarge/ The influenza virus from an image pro...,ars-technica,Ars Technica,Person in Missouri caught H bird flu without a...,...,Enlarge The influenza virus from an image prod...,0.0,0.0,neutral,0.0,0.0,neutral,0.183333,0.0,neutral
2,Kevin Purdy,"Balatro arrives on phones Sept. 26, so plan yo...",It has already sold 2 million copies. Now the ...,https://arstechnica.com/gaming/2024/09/balatro...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-05T17:38:52Z,Enlarge/ The energy captured by Balatro's mobi...,ars-technica,Ars Technica,Balatro arrives on phones Sept so plan your s...,...,Enlarge The energy captured by Balatros mobile...,0.857143,-0.714286,negative,1.0,-1.0,negative,0.15,0.15,positive
3,Jennifer Ouellette,Jack Black stars as expert crafter Steve in A ...,"""Anything you can dream about here, you can ma...",https://arstechnica.com/culture/2024/09/magica...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T19:47:36Z,12\r\nJason Momoa and Jack Black star in A Min...,ars-technica,Ars Technica,Jack Black stars as expert crafter Steve in A ...,...,Jason Momoa and Jack Black star in A Minecra...,0.433333,-0.166667,negative,0.572222,0.363889,positive,0.0,0.0,neutral
4,Jennifer Ouellette,Cats play fetch more often than previously bel...,About 4 in 10 cats and nearly 8 in 10 dogs lik...,https://arstechnica.com/science/2024/09/cats-p...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T18:00:00Z,"9\r\nAlthough more common in dogs, 4 in 10 pet...",ars-technica,Ars Technica,Cats play fetch more often than previously bel...,...,\r Although more common in dogs in pet cat...,0.244444,0.055556,positive,0.5,0.1,positive,0.7,0.05,positive


## Perform Named Entity Recognition

In [12]:
# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

def extract_entities(df, column):
    # Apply NER to each text sample in the column
    entities = df[column].apply(lambda text: [(ent.text, ent.label_) for ent in nlp(text).ents])

    # Create new columns for entities
    df[f'{column}_entities'] = entities

    return df

In [13]:
news_df = extract_entities(news_df, 'title_clean')

## Calculate entity sentiment

In [14]:
def calculate_entity_polarity(df, key_column, polarity_columns):
    entity_polarity_df = pd.DataFrame({
        'Key': df[key_column],
        'Entity': df[key_column].apply(lambda x: [ent[0] for ent in x]),
        'Entity_Type': df[key_column].apply(lambda x: [ent[1] for ent in x]),
    })

    for polarity_column in polarity_columns:
        entity_polarity_df[polarity_column] = df[polarity_column]

    entity_polarity_df = entity_polarity_df.explode('Entity')
    entity_polarity_df = entity_polarity_df.explode('Entity_Type')

    entity_polarity_df_sums = []
    for polarity_column in polarity_columns:
        entity_polarity_df_sum = entity_polarity_df.groupby(['Entity', 'Entity_Type'])[polarity_column].agg(['mean', 'var', 'count']).reset_index()
        entity_polarity_df_sum.columns = [f'{col}_{polarity_column}' if col != 'Entity' and col != 'Entity_Type' else col for col in entity_polarity_df_sum.columns]
        entity_polarity_df_sums.append(entity_polarity_df_sum)

    merged_df = pd.merge(*entity_polarity_df_sums, on=['Entity', 'Entity_Type'])

    merged_df = pd.merge(merged_df, entity_polarity_df, on=['Entity', 'Entity_Type'], suffixes=('_agg', ''))

    merged_df.drop_duplicates(subset=['Entity', 'Entity_Type'], inplace=True)

    return merged_df

In [15]:
entity_df = calculate_entity_polarity(news_df, 'title_clean_entities', ['title_clean_polarity', 'description_clean_polarity'])

## Output

In [17]:
entity_df.head()

Unnamed: 0,Entity,Entity_Type,mean_title_clean_polarity,var_title_clean_polarity,count_title_clean_polarity,mean_description_clean_polarity,var_description_clean_polarity,count_description_clean_polarity,Key,title_clean_polarity,description_clean_polarity
0,A Small Light and Togo,ORG,-0.004167,0.0,2,0.5,0.0,2,"[(Disney, ORG), (Disney, ORG), (A Small Light ...",-0.004167,0.5
2,A Small Light and Togo,WORK_OF_ART,-0.004167,,1,0.5,,1,"[(Disney, ORG), (Disney, ORG), (A Small Light ...",-0.004167,0.5
3,AFP,NORP,0.0,,1,0.0,,1,"[(AFP, ORG), (French, NORP), (TechCrunch, ORG)]",0.0,0.0
4,AFP,ORG,0.0,0.0,2,0.0,0.0,2,"[(AFP, ORG), (French, NORP), (TechCrunch, ORG)]",0.0,0.0
6,AGI,ORG,0.0,0.0,2,-0.29,0.0,2,"[(AI, ORG), (AGI, ORG), (TechCrunch, PRODUCT)]",0.0,-0.29


In [18]:
news_df.head()

Unnamed: 0,author,title,description,url,urlToImage,publishedAt,content,source.id,source.name,title_clean,...,title_clean_subjectivity,title_clean_polarity,title_clean_polarity_label,content_clean_subjectivity,content_clean_polarity,content_clean_polarity_label,description_clean_subjectivity,description_clean_polarity,description_clean_polarity_label,title_clean_entities
0,Stephen Clark,NASA wants Starliner to make a quick getaway f...,Starliner is set to land at White Sands Space ...,https://arstechnica.com/space/2024/09/boeings-...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T21:50:35Z,Enlarge/ Boeing's Starliner spacecraft is set ...,ars-technica,Ars Technica,NASA wants Starliner to make a quick getaway f...,...,0.3,0.266667,positive,0.0,0.0,neutral,0.251515,0.045455,positive,"[(NASA, ORG), (Starliner, PERSON)]"
1,Beth Mole,Person in Missouri caught H5 bird flu without ...,"The person recovered, and Missouri officials s...",https://arstechnica.com/science/2024/09/missou...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-06T23:02:37Z,Enlarge/ The influenza virus from an image pro...,ars-technica,Ars Technica,Person in Missouri caught H bird flu without a...,...,0.0,0.0,neutral,0.0,0.0,neutral,0.183333,0.0,neutral,"[(Missouri, GPE)]"
2,Kevin Purdy,"Balatro arrives on phones Sept. 26, so plan yo...",It has already sold 2 million copies. Now the ...,https://arstechnica.com/gaming/2024/09/balatro...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-05T17:38:52Z,Enlarge/ The energy captured by Balatro's mobi...,ars-technica,Ars Technica,Balatro arrives on phones Sept so plan your s...,...,0.857143,-0.714286,negative,1.0,-1.0,negative,0.15,0.15,positive,[]
3,Jennifer Ouellette,Jack Black stars as expert crafter Steve in A ...,"""Anything you can dream about here, you can ma...",https://arstechnica.com/culture/2024/09/magica...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T19:47:36Z,12\r\nJason Momoa and Jack Black star in A Min...,ars-technica,Ars Technica,Jack Black stars as expert crafter Steve in A ...,...,0.433333,-0.166667,negative,0.572222,0.363889,positive,0.0,0.0,neutral,"[(Jack Black, PERSON), (Steve, PERSON), (Minec..."
4,Jennifer Ouellette,Cats play fetch more often than previously bel...,About 4 in 10 cats and nearly 8 in 10 dogs lik...,https://arstechnica.com/science/2024/09/cats-p...,https://cdn.arstechnica.net/wp-content/uploads...,2024-09-04T18:00:00Z,"9\r\nAlthough more common in dogs, 4 in 10 pet...",ars-technica,Ars Technica,Cats play fetch more often than previously bel...,...,0.244444,0.055556,positive,0.5,0.1,positive,0.7,0.05,positive,[]
