# Hypothesis testing: Impartiality

In this Notebook, the impartiality data is explored and the related hypotheses are (partially) tested.

## Loading packages and data

In [1]:
#import the necessary packages
import pandas as pd
from pandas import read_excel
from scipy import stats
from scipy.stats import mannwhitneyu

In [2]:
#read and inspect the data
df = read_excel("complete_data_cleaned_with_impartiality.xlsx")
df.head(3)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ID,Newspaper,Date,Length,Category,Author,Headline,Teaser,...,Modality,url,clean text,words in clean text,reach_dummy,modality_dummy,neutrality dummy,balance of actors dummy,balance of viewpoints dummy,impartiality
0,0,6,100006,sueddeutschet politik (www),2020-05-28T15:34:08,367,,,SZ Espresso: Nachrichten kompakt - die Übersic...,<p>Was heute wichtig war - und was Sie auf SZ....,...,online,https://www.sueddeutsche.de/politik/nachrichte...,"das wichtig coronavirus . berufstat mutt vat ,...",224,1,0,1,0,1,2
1,1,8,100008,sueddeutschet politik (www),2020-05-28T17:01:43,200,,,Kommunalpolitik: Abgeblendet,<p>Bayreuths Stadtrat im Stream</p>,...,online,https://www.sueddeutsche.de/bayern/kommunalpol...,"livestream stadtrat , klingt transparent erstr...",104,1,0,1,1,0,2
2,2,24,100024,aachener zeitung (www),2020-05-28T03:01:52,512,Politik,,Länder planen Öffnung: Streit über Schulen und...,"<img src=""https://www.aachener-zeitung.de/imgs...",...,online,https://www.aachener-zeitung.de/politik/deutsc...,der streit wiederoffn schul kindergart kris ve...,318,0,0,1,0,1,2


In [3]:
df.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'ID', 'Newspaper', 'Date', 'Length',
       'Category', 'Author', 'Headline', 'Teaser', 'Article', 'Modality',
       'url', 'clean text', 'words in clean text', 'reach_dummy',
       'modality_dummy', 'neutrality dummy', 'balance of actors dummy',
       'balance of viewpoints dummy', 'impartiality'],
      dtype='object')

## Select relevant columns

In [5]:
df = df[["ID", "Newspaper", "Length", "Article", "modality_dummy", "reach_dummy", "clean text", 
         "neutrality dummy", "balance of actors dummy", "balance of viewpoints dummy", "impartiality"]]
len(df)

11491

## Descriptives

In [6]:
print("Overall sample")
print(df["impartiality"].mean())
print(df["impartiality"].std())

Overall sample
1.3869114959533548
0.798812842452084


In [7]:
print("Print sample")
print(df[df["modality_dummy"] == 1]["impartiality"].mean())
print(df[df["modality_dummy"] == 1]["impartiality"].std())
print("")
print("Online sample")
print(df[df["modality_dummy"] == 0]["impartiality"].mean())
print(df[df["modality_dummy"] == 0]["impartiality"].std())

Print sample
1.3879451002975334
0.7979820672125785

Online sample
1.376865671641791
0.8071495815058273


In [8]:
print("National sample")
print(df[df["reach_dummy"] == 1]["impartiality"].mean())
print(df[df["reach_dummy"] == 1]["impartiality"].std())
print("")
print("Regional sample")
print(df[df["reach_dummy"] == 0]["impartiality"].mean())
print(df[df["reach_dummy"] == 0]["impartiality"].std())

National sample
1.3041995970866265
0.8096841200677771

Regional sample
1.4928543072647875
0.7719063025374823


## Hypothesis test

In [10]:
print("RQ3a: Reach")
print(mannwhitneyu(df[df["reach_dummy"] == 1]["impartiality"], df[df["reach_dummy"] == 0]["impartiality"]))

RQ3a: Reach
MannwhitneyuResult(statistic=14138917.5, pvalue=2.382451200871224e-38)


In [11]:
print("RQ3b: Modality")
print(mannwhitneyu(df[df["modality_dummy"] == 1]["impartiality"], df[df["modality_dummy"] == 0]["impartiality"]))

RQ3b: Modality
MannwhitneyuResult(statistic=5532931.5, pvalue=0.29563392256831444)


## Comparisons

In [16]:
df[["Newspaper", "neutrality dummy", "balance of actors dummy", "balance of viewpoints dummy", "impartiality"]].groupby("Newspaper").mean()

Unnamed: 0_level_0,neutrality dummy,balance of actors dummy,balance of viewpoints dummy,impartiality
Newspaper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aachener Zeitung,0.586598,0.286598,0.61134,1.484536
Der Tagesspiegel,0.462675,0.250389,0.64619,1.359253
Die Welt,0.216606,0.268351,0.593261,1.078219
Rheinische Post,0.616,0.227789,0.663579,1.507368
Stuttgarter Zeitung,0.586904,0.264349,0.599838,1.451091
Süddeutsche Zeitung (inkl. Regionalausgaben),0.428226,0.295699,0.620699,1.344624
aachener zeitung (www),0.595238,0.345238,0.52381,1.464286
der tagesspiegel (www),0.390152,0.204545,0.537879,1.132576
die welt (www),0.440678,0.293785,0.564972,1.299435
rheinische post (www),0.710983,0.248555,0.653179,1.612717


In [13]:
df[["Newspaper", "neutrality dummy", "balance of actors dummy", "balance of viewpoints dummy", "impartiality"]].groupby("Newspaper").std()

Unnamed: 0_level_0,neutrality dummy,balance of actors dummy,balance of viewpoints dummy,impartiality
Newspaper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aachener Zeitung,0.492698,0.452405,0.487697,0.773842
Der Tagesspiegel,0.498799,0.433405,0.478337,0.808527
Die Welt,0.41218,0.443368,0.491521,0.760402
Rheinische Post,0.48646,0.419494,0.472584,0.771114
Stuttgarter Zeitung,0.492589,0.441164,0.490129,0.776001
Süddeutsche Zeitung (inkl. Regionalausgaben),0.494888,0.456418,0.485278,0.810358
aachener zeitung (www),0.492313,0.476867,0.500926,0.795894
der tagesspiegel (www),0.488711,0.404135,0.49951,0.795328
die welt (www),0.497877,0.456787,0.497167,0.822724
rheinische post (www),0.454622,0.43343,0.47734,0.72758


## Means and standard deviations

In [14]:
print("AZ")
print(df[(df["Newspaper"]== "Aachener Zeitung") | (df["Newspaper"]== "aachener zeitung (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Aachener Zeitung") | (df["Newspaper"]== "aachener zeitung (www)")]["impartiality"].std())
print("RP")
print(df[(df["Newspaper"]== "Rheinische Post") | (df["Newspaper"]== "rheinische post (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Rheinische Post") | (df["Newspaper"]== "rheinische post (www)")]["impartiality"].std())
print("STZ")
print(df[(df["Newspaper"]== "Stuttgarter Zeitung") | (df["Newspaper"]== "stuttgarter zeitung (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Stuttgarter Zeitung") | (df["Newspaper"]== "stuttgarter zeitung (www)")]["impartiality"].std())
print("Welt")
print(df[(df["Newspaper"]== "Die Welt") | (df["Newspaper"]== "die welt (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Die Welt") | (df["Newspaper"]== "die welt (www)")]["impartiality"].std())
print("TS")
print(df[(df["Newspaper"]== "Der Tagesspiegel") | (df["Newspaper"]== "der tagesspiegel (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Der Tagesspiegel") | (df["Newspaper"]== "der tagesspiegel (www)")]["impartiality"].std())
print("SZ")
print(df[(df["Newspaper"]== "Süddeutsche Zeitung (inkl. Regionalausgaben)") | (df["Newspaper"]== "sueddeutschet politik (www)")]["impartiality"].mean())
print(df[(df["Newspaper"]== "Süddeutsche Zeitung (inkl. Regionalausgaben)") | (df["Newspaper"]== "sueddeutschet politik (www)")]["impartiality"].std())

AZ
1.4815465729349737
0.7768146842573067
RP
1.5145211930926217
0.7685572826816981
STZ
1.4615384615384615
0.7733534118563765
Welt
1.117063492063492
0.775868996057464
TS
1.3206451612903225
0.8105340016774143
SZ
1.3460847240051348
0.8113660024574407
