In [1]:
import pandas as pd
from helpers import get_years_dfs, run_stats_test

In [2]:
UNDERLINE = "\033[4m"
END = "\033[0m"

def print_stats_result(result, min_significant_pvalue=0.05):
    print(result)

    if result.pvalue <= min_significant_pvalue:
        print(UNDERLINE + "Statistically significant" + END + " change over the years")
    else:
        print(UNDERLINE + "Insigificant" + END + " change")

In [3]:
def run_stats_tests_on_sentiment(input_path: str, min_significant_pvalue=0.05):
    years_dfs = get_years_dfs(input_path)

    print("All polarities:")

    all_polarities_result = run_stats_test(years_dfs, "polarity")
    print_stats_result(all_polarities_result, min_significant_pvalue)

    print("")


    print("Positive polarities:")

    positive_polarity_dfs = [df[df["polarity"] > 0] for df in years_dfs]
    positive_polarities_result = run_stats_test(positive_polarity_dfs, "polarity")
    print_stats_result(positive_polarities_result, min_significant_pvalue)

    print("")


    print("Negative polarities:")

    negative_polarity_dfs = [df[df["polarity"] < 0] for df in years_dfs]
    negative_polarities_result = run_stats_test(negative_polarity_dfs, "polarity")
    print_stats_result(negative_polarities_result, min_significant_pvalue)

    print("")
    

    print("Subjectivity:")

    subjectivity_result = run_stats_test(years_dfs, "subjectivity")
    print_stats_result(subjectivity_result, min_significant_pvalue)

### Body text

#### All text types

In [4]:
run_stats_tests_on_sentiment("./output/sentiment.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=41.02239709398934, pvalue=0.0003172883543388927)
[4mStatistically significant[0m change over the years

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=64.43595080917707, pvalue=4.2893079669177265e-08)
[4mStatistically significant[0m change over the years

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=51.94576396536402, pvalue=5.766316656597938e-06)
[4mStatistically significant[0m change over the years

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=49.59416509082896, pvalue=1.4024628377327965e-05)
[4mStatistically significant[0m change over the years


#### News and blog

In [5]:
run_stats_tests_on_sentiment("./output/news_and_blog/sentiment_news_and_blog.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=30.003135674728256, pvalue=0.011910194043908136)
[4mStatistically significant[0m change over the years

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=23.372057520840052, pvalue=0.07655352798340403)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=19.18457124867629, pvalue=0.20549154702957353)
[4mInsigificant[0m change


  if levene(*samples).pvalue <= 0.05:
  return kruskal(*samples)


#### Press release

In [6]:
run_stats_tests_on_sentiment("./output/press_release/sentiment_press_release.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=14.536637953115136, pvalue=0.4852792422233524)
[4mInsigificant[0m change

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=18.97487368801161, pvalue=0.2148747441876788)
[4mInsigificant[0m change


  if levene(*samples).pvalue <= 0.05:
  return kruskal(*samples)


#### Social media

In [7]:
run_stats_tests_on_sentiment("./output/social_media/sentiment_social_media.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=18.50377677528658, pvalue=0.13931093606616693)
[4mInsigificant[0m change

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=29.976655157125002, pvalue=0.004746317504106917)
[4mStatistically significant[0m change over the years


  if levene(*samples).pvalue <= 0.05:
  return kruskal(*samples)


### Headline text

#### All text types

In [8]:
run_stats_tests_on_sentiment("./output/sentiment_headlines.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=23.951469474448203, pvalue=0.06592221405040607)
[4mInsigificant[0m change

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=17.143261561747966, pvalue=0.3103726214698343)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=20.91510928388156, pvalue=0.13956360617504143)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=16.342234489888483, pvalue=0.35967834535972054)
[4mInsigificant[0m change


#### News and blog

In [9]:
run_stats_tests_on_sentiment("./output/news_and_blog/sentiment_news_and_blog_headlines.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=19.86238402161159, pvalue=0.17727561041564813)
[4mInsigificant[0m change

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=11.403900769451484, pvalue=0.7234725119765049)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=17.462281922709, pvalue=0.29198811944977676)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=16.33518789395167, pvalue=0.3601313797752167)
[4mInsigificant[0m change


#### Press release

In [10]:
run_stats_tests_on_sentiment("./output/press_release/sentiment_press_release_headlines.xlsx")

All polarities:
Running test for non-normal distributions
KruskalResult(statistic=17.38764514014097, pvalue=0.2962238570490995)
[4mInsigificant[0m change

Positive polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Negative polarities:
Running test for non-normal distributions
KruskalResult(statistic=nan, pvalue=nan)
[4mInsigificant[0m change

Subjectivity:
Running test for non-normal distributions
KruskalResult(statistic=10.786812494469856, pvalue=0.7675552648207807)
[4mInsigificant[0m change


  if levene(*samples).pvalue <= 0.05:
  return kruskal(*samples)


Not running for social media headlines because not enough data