In [32]:
import os
import sys

import pandas as pd 
import statsmodels.formula.api as smf

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_input.load_data import load_corpus

import readability
import spacy

In [33]:
corpus = load_corpus("../../data") # to reproduce these analyses, download the respective data release from https://github.com/Blubberli/argmin2024-perspective
# in this notebook, we used the first data release.

In [34]:
corpus.head()

Unnamed: 0,argument_id,argument,stance,topic,gender,age,residence,civil_status,denomination,education,political_spectrum,important_political_issues,rile,galtan
0,201900,Das Schweizer Volk hat die MEI angenommen und ...,FAVOR,Immigration,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,Mitte und Konservativ-Liberal,"[Liberale Wirtschaftspolitik, Restriktive Migr...",Mitte,Konservativ-Liberal
1,201901,Eine Legalisierung von Cannabis entlasten die ...,FAVOR,Society,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,Mitte und Konservativ-Liberal,"[Liberale Wirtschaftspolitik, Restriktive Migr...",Mitte,Konservativ-Liberal
2,201902,Durch die Förderung der familienergänzenden Be...,FAVOR,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,Mitte und Konservativ,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ
3,201903,Ich ziehe eine Elternzeit vor. Die Zeit nach d...,AGAINST,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,Mitte und Konservativ,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ
4,201904,Unser Asylrecht muss konsequent angewendet wer...,AGAINST,Immigration,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,Mitte und Konservativ,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ


# Surface features: readability like 'FleschReadingEase' and 'GunningFogIndex' and also further sentence info ones

In [35]:
corpus['FleschReadingEase'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['readability grades']['FleschReadingEase'])
corpus['GunningFogIndex'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['readability grades']['GunningFogIndex'])

corpus['characters_per_word'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['sentence info']['characters_per_word'])
corpus['words_per_sentence'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['sentence info']['words_per_sentence'])
corpus['type_token_ratio'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['sentence info']['type_token_ratio'])
corpus['long_words'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['sentence info']['long_words'])
corpus['complex_words'] = corpus['argument'].apply(lambda x: readability.getmeasures(x, lang='de')['sentence info']['complex_words'])

In [5]:
corpus.head()

Unnamed: 0,argument_id,argument,stance,topic,gender,age,residence,civil_status,denomination,education,...,important_political_issues,rile,galtan,FleschReadingEase,GunningFogIndex,characters_per_word,words_per_sentence,type_token_ratio,long_words,complex_words
0,201900,Das Schweizer Volk hat die MEI angenommen und ...,FAVOR,Immigration,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,...,"[Liberale Wirtschaftspolitik, Restriktive Migr...",Mitte,Konservativ-Liberal,25.703448,15.737931,5.758621,29.0,0.896552,9,3
1,201901,Eine Legalisierung von Cannabis entlasten die ...,FAVOR,Society,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,...,"[Liberale Wirtschaftspolitik, Restriktive Migr...",Mitte,Konservativ-Liberal,7.991154,13.476923,6.576923,26.0,0.923077,11,2
2,201902,Durch die Förderung der familienergänzenden Be...,FAVOR,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ,21.646522,10.93913,6.695652,23.0,0.956522,7,1
3,201903,Ich ziehe eine Elternzeit vor. Die Zeit nach d...,AGAINST,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ,55.215294,11.505882,5.764706,17.0,1.0,4,2
4,201904,Unser Asylrecht muss konsequent angewendet wer...,AGAINST,Immigration,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,"[Offene Aussenpolitik, Liberale Wirtschaftspol...",Mitte,Konservativ,32.505,11.466667,7.166667,12.0,0.916667,6,2


---------
# Flesch Reading Ease and Gunning Fog Index

#### - Flesch Reading Ease: *Higher means easier to read, lower means harder to read*
#### - Gunning Fog Index: Estimates the number of years of formal education a person needs to understand the text on the first reading -> *Higher means harder to understand*

## Stance, gender, age, residence

In [6]:
results = smf.ols('FleschReadingEase ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.01
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,55.51
Date:,"Tue, 07 May 2024",Prob (F-statistic):,1.5299999999999999e-68
Time:,15:14:52,Log-Likelihood:,-153800.0
No. Observations:,32387,AIC:,307600.0
Df Residuals:,32380,BIC:,307700.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,23.1242,0.330,70.176,0.000,22.478,23.770
stance[T.FAVOR],-0.9932,0.311,-3.193,0.001,-1.603,-0.383
gender[T.Weiblich],-3.2528,0.326,-9.992,0.000,-3.891,-2.615
age[T.35-49],-0.4331,0.393,-1.101,0.271,-1.204,0.338
age[T.50-64],2.6393,0.389,6.781,0.000,1.876,3.402
age[T.65+],7.3378,0.677,10.846,0.000,6.012,8.664
residence[T.Stadt],-3.6061,0.632,-5.708,0.000,-4.844,-2.368

0,1,2,3
Omnibus:,1022.332,Durbin-Watson:,1.437
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1549.673
Skew:,-0.315,Prob(JB):,0.0
Kurtosis:,3.868,Cond. No.,6.02


In [7]:
results = smf.ols('GunningFogIndex ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,90.79
Date:,"Tue, 07 May 2024",Prob (F-statistic):,1.77e-113
Time:,15:14:52,Log-Likelihood:,-113620.0
No. Observations:,32387,AIC:,227300.0
Df Residuals:,32380,BIC:,227300.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,16.9718,0.095,178.063,0.000,16.785,17.159
stance[T.FAVOR],0.2646,0.090,2.941,0.003,0.088,0.441
gender[T.Weiblich],0.8626,0.094,9.160,0.000,0.678,1.047
age[T.35-49],-0.2912,0.114,-2.559,0.011,-0.514,-0.068
age[T.50-64],-1.8752,0.113,-16.655,0.000,-2.096,-1.654
age[T.65+],-1.8420,0.196,-9.413,0.000,-2.226,-1.458
residence[T.Stadt],-1.7632,0.183,-9.648,0.000,-2.121,-1.405

0,1,2,3
Omnibus:,3909.977,Durbin-Watson:,0.998
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5526.181
Skew:,0.945,Prob(JB):,0.0
Kurtosis:,3.722,Cond. No.,6.02


#### *What I understand from this:*
#### - Stance: Favor is harder to read/understand than Against
#### - Gender: Females are tending to write arguments that are harder to understand (about 1 year of education in Gunning Fog) than males
#### - Age: Older people are writing easier arguments. Here, Flesch Reading Ease might be more meaningful for reranking because there is a more obvious seperation between [T.50-64] and [T.65+], but Gunning Fog might help more with the younger groups (?)
#### - Residence: "Stadt" people tend to write easier arguments. 

## Topic

In [20]:
results = smf.ols('FleschReadingEase ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.008
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,24.36
Date:,"Tue, 07 May 2024",Prob (F-statistic):,7.86e-51
Time:,15:31:59,Log-Likelihood:,-153830.0
No. Observations:,32387,AIC:,307700.0
Df Residuals:,32375,BIC:,307800.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,20.2604,0.792,25.593,0.000,18.709,21.812
topic[T.Economy],-1.6255,0.941,-1.727,0.084,-3.470,0.219
topic[T.Education],-1.4571,0.949,-1.536,0.125,-3.317,0.403
topic[T.Finances],1.1619,1.024,1.135,0.256,-0.845,3.169
topic[T.Foreign Policy],-0.4375,1.051,-0.416,0.677,-2.497,1.622
topic[T.Healthcare],3.8157,0.944,4.041,0.000,1.965,5.666
topic[T.Immigration],3.4677,0.933,3.716,0.000,1.639,5.297
topic[T.Infrastructure & Environment],0.9157,0.893,1.025,0.305,-0.834,2.666
topic[T.Political System],7.4517,1.104,6.751,0.000,5.288,9.615

0,1,2,3
Omnibus:,977.729,Durbin-Watson:,1.428
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1462.898
Skew:,-0.308,Prob(JB):,0.0
Kurtosis:,3.84,Cond. No.,18.9


In [21]:
results = smf.ols('GunningFogIndex ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,10.02
Date:,"Tue, 07 May 2024",Prob (F-statistic):,1.82e-18
Time:,15:32:02,Log-Likelihood:,-113840.0
No. Observations:,32387,AIC:,227700.0
Df Residuals:,32375,BIC:,227800.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,16.5312,0.230,71.789,0.000,16.080,16.983
topic[T.Economy],0.8182,0.274,2.989,0.003,0.282,1.355
topic[T.Education],0.5191,0.276,1.881,0.060,-0.022,1.060
topic[T.Finances],-0.2570,0.298,-0.863,0.388,-0.841,0.327
topic[T.Foreign Policy],0.1057,0.306,0.346,0.729,-0.493,0.705
topic[T.Healthcare],-0.6771,0.275,-2.465,0.014,-1.215,-0.139
topic[T.Immigration],0.1558,0.271,0.574,0.566,-0.376,0.688
topic[T.Infrastructure & Environment],0.5071,0.260,1.952,0.051,-0.002,1.016
topic[T.Political System],-0.8541,0.321,-2.660,0.008,-1.483,-0.225

0,1,2,3
Omnibus:,4122.39,Durbin-Watson:,0.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5945.726
Skew:,0.973,Prob(JB):,0.0
Kurtosis:,3.786,Cond. No.,18.9


#### *What I understand from this:*
#### - Very mixed -> need to check whether this influences the other effects

## Civil status

In [22]:
results = smf.ols('FleschReadingEase ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,17.56
Date:,"Tue, 07 May 2024",Prob (F-statistic):,2.18e-26
Time:,15:32:54,Log-Likelihood:,-153890.0
No. Observations:,32387,AIC:,307800.0
Df Residuals:,32378,BIC:,307900.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,21.0175,28.019,0.750,0.453,-33.902,75.937
civil_status[T.Eingetragene Partnerschaft],10.0539,28.278,0.356,0.722,-45.371,65.479
civil_status[T.Geschieden],5.9776,28.037,0.213,0.831,-48.975,60.930
civil_status[T.Getrennt],6.1862,28.322,0.218,0.827,-49.327,61.699
civil_status[T.Konkubinat],4.7638,28.038,0.170,0.865,-50.192,59.720
civil_status[T.Ledig],-0.0651,28.021,-0.002,0.998,-54.988,54.858
civil_status[T.Nicht bekannt],2.3997,28.021,0.086,0.932,-52.522,57.321
civil_status[T.Verheiratet],-0.0033,28.021,-0.000,1.000,-54.925,54.918
civil_status[T.Verwitwet],11.5871,28.067,0.413,0.680,-43.425,66.599

0,1,2,3
Omnibus:,1032.462,Durbin-Watson:,1.428
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1568.789
Skew:,-0.316,Prob(JB):,0.0
Kurtosis:,3.873,Cond. No.,620.0


In [23]:
results = smf.ols('GunningFogIndex ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,23.72
Date:,"Tue, 07 May 2024",Prob (F-statistic):,1.2e-36
Time:,15:32:57,Log-Likelihood:,-113800.0
No. Observations:,32387,AIC:,227600.0
Df Residuals:,32378,BIC:,227700.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,14.0500,8.125,1.729,0.084,-1.875,29.975
civil_status[T.Eingetragene Partnerschaft],-0.7159,8.200,-0.087,0.930,-16.787,15.356
civil_status[T.Geschieden],-0.0331,8.130,-0.004,0.997,-15.967,15.901
civil_status[T.Getrennt],0.5209,8.213,0.063,0.949,-15.576,16.618
civil_status[T.Konkubinat],2.2509,8.130,0.277,0.782,-13.684,18.186
civil_status[T.Ledig],2.5524,8.125,0.314,0.753,-13.373,18.478
civil_status[T.Nicht bekannt],3.0571,8.125,0.376,0.707,-12.868,18.982
civil_status[T.Verheiratet],2.1312,8.125,0.262,0.793,-13.794,18.057
civil_status[T.Verwitwet],0.5199,8.138,0.064,0.949,-15.432,16.472

0,1,2,3
Omnibus:,4009.881,Durbin-Watson:,0.984
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5722.492
Skew:,0.958,Prob(JB):,0.0
Kurtosis:,3.754,Cond. No.,620.0


#### *What I understand from this:*
#### - Nothing significant!

## Denomination

In [24]:
results = smf.ols('FleschReadingEase ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,11.58
Date:,"Tue, 07 May 2024",Prob (F-statistic):,2.37e-18
Time:,15:33:35,Log-Likelihood:,-153910.0
No. Observations:,32387,AIC:,307800.0
Df Residuals:,32377,BIC:,307900.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,36.5228,4.433,8.239,0.000,27.834,45.211
denomination[T.Andere christliche Gemeinschaften],-12.3423,4.575,-2.698,0.007,-21.309,-3.375
denomination[T.Christ-katholisch],-12.0980,4.644,-2.605,0.009,-21.200,-2.996
denomination[T.Christlich-orthodox],-4.4525,7.148,-0.623,0.533,-18.462,9.557
denomination[T.Evangelischreformiert/protestantisch],-16.8080,4.446,-3.780,0.000,-25.523,-8.093
denomination[T.Islamische Gemeinschaften],-0.9622,8.293,-0.116,0.908,-17.217,15.292
denomination[T.Jüdische Gemeinschaften],-9.6385,8.706,-1.107,0.268,-26.702,7.425
denomination[T.Konfessionslos],-13.6097,4.454,-3.055,0.002,-22.340,-4.879
denomination[T.Nicht bekannt],-13.2075,4.439,-2.976,0.003,-21.907,-4.508

0,1,2,3
Omnibus:,1025.177,Durbin-Watson:,1.428
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1552.513
Skew:,-0.316,Prob(JB):,0.0
Kurtosis:,3.867,Cond. No.,108.0


In [25]:
results = smf.ols('GunningFogIndex ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,61.36
Date:,"Tue, 07 May 2024",Prob (F-statistic):,3.56e-112
Time:,15:33:38,Log-Likelihood:,-113620.0
No. Observations:,32387,AIC:,227300.0
Df Residuals:,32377,BIC:,227300.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,14.5781,1.278,11.411,0.000,12.074,17.082
denomination[T.Andere christliche Gemeinschaften],-0.1121,1.319,-0.085,0.932,-2.696,2.472
denomination[T.Christ-katholisch],6.2508,1.338,4.670,0.000,3.627,8.874
denomination[T.Christlich-orthodox],-2.9242,2.060,-1.420,0.156,-6.962,1.113
denomination[T.Evangelischreformiert/protestantisch],1.0564,1.281,0.824,0.410,-1.455,3.568
denomination[T.Islamische Gemeinschaften],-2.5534,2.390,-1.068,0.285,-7.238,2.131
denomination[T.Jüdische Gemeinschaften],-4.2628,2.509,-1.699,0.089,-9.181,0.655
denomination[T.Konfessionslos],1.8561,1.284,1.446,0.148,-0.660,4.372
denomination[T.Nicht bekannt],2.8176,1.279,2.203,0.028,0.310,5.325

0,1,2,3
Omnibus:,3755.873,Durbin-Watson:,1.001
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5233.465
Skew:,0.924,Prob(JB):,0.0
Kurtosis:,3.679,Cond. No.,108.0


#### *What I understand from this:*
#### - Christ Catholics (some Swiss catholic sect)are writing the most complex in terms of Gunning Fog: About 6 more years of education needed in contrast to reference category.
#### - In terms of Flesch Reading Ease, the picture is a bit different. Protestans are writing arguments that are harder to read.

## Political spectrum

In [26]:
results = smf.ols('FleschReadingEase ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,47.81
Date:,"Tue, 07 May 2024",Prob (F-statistic):,3.0900000000000002e-77
Time:,15:44:01,Log-Likelihood:,-153770.0
No. Observations:,32387,AIC:,307600.0
Df Residuals:,32378,BIC:,307600.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,22.4255,0.586,38.291,0.000,21.278,23.573
political_spectrum[T.Links und Konservativ-Liberal],-1.7175,0.624,-2.751,0.006,-2.941,-0.494
political_spectrum[T.Links und Liberal],8.3707,1.884,4.443,0.000,4.678,12.064
political_spectrum[T.Mitte und Konservativ],-2.0627,0.687,-3.004,0.003,-3.408,-0.717
political_spectrum[T.Mitte und Konservativ-Liberal],4.7417,0.722,6.564,0.000,3.326,6.158
political_spectrum[T.Mitte und Liberal],8.4978,1.038,8.184,0.000,6.463,10.533
political_spectrum[T.Rechts und Konservativ],-2.0943,1.895,-1.105,0.269,-5.808,1.620
political_spectrum[T.Rechts und Konservativ-Liberal],5.4501,1.043,5.227,0.000,3.406,7.494
political_spectrum[T.Rechts und Liberal],0.1624,1.395,0.116,0.907,-2.572,2.897

0,1,2,3
Omnibus:,1046.392,Durbin-Watson:,1.436
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1610.208
Skew:,-0.316,Prob(JB):,0.0
Kurtosis:,3.891,Cond. No.,16.5


In [27]:
results = smf.ols('GunningFogIndex ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.03
Model:,OLS,Adj. R-squared:,0.03
Method:,Least Squares,F-statistic:,124.3
Date:,"Tue, 07 May 2024",Prob (F-statistic):,3.97e-206
Time:,15:44:06,Log-Likelihood:,-113400.0
No. Observations:,32387,AIC:,226800.0
Df Residuals:,32378,BIC:,226900.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,15.0471,0.168,89.362,0.000,14.717,15.377
political_spectrum[T.Links und Konservativ-Liberal],2.7682,0.180,15.419,0.000,2.416,3.120
political_spectrum[T.Links und Liberal],2.5168,0.542,4.646,0.000,1.455,3.579
political_spectrum[T.Mitte und Konservativ],0.9961,0.197,5.046,0.000,0.609,1.383
political_spectrum[T.Mitte und Konservativ-Liberal],-0.4707,0.208,-2.266,0.023,-0.878,-0.064
political_spectrum[T.Mitte und Liberal],-0.8113,0.299,-2.718,0.007,-1.396,-0.226
political_spectrum[T.Rechts und Konservativ],-0.7989,0.545,-1.467,0.143,-1.867,0.269
political_spectrum[T.Rechts und Konservativ-Liberal],-0.4587,0.300,-1.530,0.126,-1.046,0.129
political_spectrum[T.Rechts und Liberal],-0.7963,0.401,-1.985,0.047,-1.582,-0.010

0,1,2,3
Omnibus:,3501.9,Durbin-Watson:,1.014
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4767.363
Skew:,0.889,Prob(JB):,0.0
Kurtosis:,3.609,Cond. No.,16.5


In [28]:
results = smf.ols('FleschReadingEase ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,49.32
Date:,"Tue, 07 May 2024",Prob (F-statistic):,4.0900000000000002e-22
Time:,15:44:36,Log-Likelihood:,-153910.0
No. Observations:,32387,AIC:,307800.0
Df Residuals:,32384,BIC:,307900.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,21.0402,0.203,103.784,0.000,20.643,21.438
rile[T.Mitte],2.8780,0.331,8.697,0.000,2.229,3.527
rile[T.Rechts],4.3647,0.696,6.271,0.000,3.001,5.729

0,1,2,3
Omnibus:,960.979,Durbin-Watson:,1.428
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1445.055
Skew:,-0.302,Prob(JB):,0.0
Kurtosis:,3.84,Cond. No.,4.85


In [29]:
results = smf.ols('GunningFogIndex ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.019
Model:,OLS,Adj. R-squared:,0.019
Method:,Least Squares,F-statistic:,321.5
Date:,"Tue, 07 May 2024",Prob (F-statistic):,5.36e-139
Time:,15:44:37,Log-Likelihood:,-113570.0
No. Observations:,32387,AIC:,227200.0
Df Residuals:,32384,BIC:,227200.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,17.4833,0.058,299.665,0.000,17.369,17.598
rile[T.Mitte],-2.1632,0.095,-22.716,0.000,-2.350,-1.977
rile[T.Rechts],-3.0334,0.200,-15.145,0.000,-3.426,-2.641

0,1,2,3
Omnibus:,3643.492,Durbin-Watson:,1.002
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5021.894
Skew:,0.91,Prob(JB):,0.0
Kurtosis:,3.641,Cond. No.,4.85


In [30]:
results = smf.ols('FleschReadingEase ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,FleschReadingEase,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,56.39
Date:,"Tue, 07 May 2024",Prob (F-statistic):,3.58e-25
Time:,15:44:39,Log-Likelihood:,-153910.0
No. Observations:,32387,AIC:,307800.0
Df Residuals:,32384,BIC:,307800.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,20.9079,0.303,69.111,0.000,20.315,21.501
galtan[T.Konservativ-Liberal],1.4200,0.357,3.981,0.000,0.721,2.119
galtan[T.Liberal],7.7337,0.728,10.617,0.000,6.306,9.162

0,1,2,3
Omnibus:,1037.634,Durbin-Watson:,1.425
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1574.475
Skew:,-0.318,Prob(JB):,0.0
Kurtosis:,3.873,Cond. No.,6.04


In [31]:
results = smf.ols('GunningFogIndex ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,GunningFogIndex,R-squared:,0.008
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,127.8
Date:,"Tue, 07 May 2024",Prob (F-statistic):,5.34e-56
Time:,15:44:41,Log-Likelihood:,-113770.0
No. Observations:,32387,AIC:,227500.0
Df Residuals:,32384,BIC:,227600.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,15.7294,0.088,179.568,0.000,15.558,15.901
galtan[T.Konservativ-Liberal],1.2912,0.103,12.503,0.000,1.089,1.494
galtan[T.Liberal],-1.0374,0.211,-4.918,0.000,-1.451,-0.624

0,1,2,3
Omnibus:,3886.344,Durbin-Watson:,0.987
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5474.19
Skew:,0.944,Prob(JB):,0.0
Kurtosis:,3.701,Cond. No.,6.04


#### *What I understand from this:*
#### - Flesch says: The more right the easier to read. The more liberal the easier to read. 
#### - GunningFog says: The more right the easier to read. Liberals write easier arguments but conservative-liberal are writing more complex/harder to read arguments

--------
# More stylistic features

#### - characters_per_word
#### - words_per_sentence
#### - type_token_ratio
#### - long_words
#### - complex_words

## Stance, gender, age, residence

In [38]:
results = smf.ols('characters_per_word ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,70.33
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.02e-87
Time:,08:08:47,Log-Likelihood:,-48088.0
No. Observations:,32387,AIC:,96190.0
Df Residuals:,32380,BIC:,96250.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.8842,0.013,467.006,0.000,5.859,5.909
stance[T.FAVOR],-0.0184,0.012,-1.546,0.122,-0.042,0.005
gender[T.Weiblich],0.0192,0.012,1.542,0.123,-0.005,0.044
age[T.35-49],0.0479,0.015,3.182,0.001,0.018,0.077
age[T.50-64],0.0949,0.015,6.378,0.000,0.066,0.124
age[T.65+],-0.1318,0.026,-5.095,0.000,-0.182,-0.081
residence[T.Stadt],0.4248,0.024,17.584,0.000,0.377,0.472

0,1,2,3
Omnibus:,4471.127,Durbin-Watson:,1.096
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9878.076
Skew:,0.827,Prob(JB):,0.0
Kurtosis:,5.141,Cond. No.,6.02


In [39]:
results = smf.ols('words_per_sentence ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,69.43
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.8100000000000003e-86
Time:,08:08:54,Log-Likelihood:,-138870.0
No. Observations:,32387,AIC:,277800.0
Df Residuals:,32380,BIC:,277800.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,27.5680,0.208,132.653,0.000,27.161,27.975
stance[T.FAVOR],0.1933,0.196,0.985,0.324,-0.191,0.578
gender[T.Weiblich],1.5686,0.205,7.640,0.000,1.166,1.971
age[T.35-49],-0.5343,0.248,-2.153,0.031,-1.021,-0.048
age[T.50-64],-3.8923,0.245,-15.856,0.000,-4.373,-3.411
age[T.65+],-4.2239,0.427,-9.899,0.000,-5.060,-3.388
residence[T.Stadt],-1.7803,0.398,-4.468,0.000,-2.561,-0.999

0,1,2,3
Omnibus:,6325.474,Durbin-Watson:,1.052
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10926.898
Skew:,1.29,Prob(JB):,0.0
Kurtosis:,4.2,Cond. No.,6.02


In [40]:
results = smf.ols('type_token_ratio ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,38.03
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.7299999999999998e-46
Time:,08:08:59,Log-Likelihood:,38703.0
No. Observations:,32387,AIC:,-77390.0
Df Residuals:,32380,BIC:,-77330.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9285,0.001,1074.629,0.000,0.927,0.930
stance[T.FAVOR],-0.0031,0.001,-3.784,0.000,-0.005,-0.001
gender[T.Weiblich],-0.0046,0.001,-5.383,0.000,-0.006,-0.003
age[T.35-49],0.0019,0.001,1.889,0.059,-7.35e-05,0.004
age[T.50-64],0.0108,0.001,10.536,0.000,0.009,0.013
age[T.65+],0.0116,0.002,6.536,0.000,0.008,0.015
residence[T.Stadt],0.0101,0.002,6.080,0.000,0.007,0.013

0,1,2,3
Omnibus:,3514.141,Durbin-Watson:,1.291
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4777.786
Skew:,-0.932,Prob(JB):,0.0
Kurtosis:,3.254,Cond. No.,6.02


In [41]:
results = smf.ols('long_words ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,long_words,R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,58.92
Date:,"Wed, 08 May 2024",Prob (F-statistic):,6.99e-73
Time:,08:09:03,Log-Likelihood:,-104840.0
No. Observations:,32387,AIC:,209700.0
Df Residuals:,32380,BIC:,209800.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.7057,0.073,133.548,0.000,9.563,9.848
stance[T.FAVOR],-0.0055,0.069,-0.080,0.936,-0.140,0.129
gender[T.Weiblich],0.3273,0.072,4.559,0.000,0.187,0.468
age[T.35-49],0.0254,0.087,0.292,0.770,-0.145,0.195
age[T.50-64],-1.0915,0.086,-12.715,0.000,-1.260,-0.923
age[T.65+],-1.8757,0.149,-12.571,0.000,-2.168,-1.583
residence[T.Stadt],0.1314,0.139,0.943,0.346,-0.142,0.404

0,1,2,3
Omnibus:,4360.129,Durbin-Watson:,1.178
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6326.182
Skew:,1.055,Prob(JB):,0.0
Kurtosis:,3.481,Cond. No.,6.02


In [42]:
results = smf.ols('complex_words ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.017
Method:,Least Squares,F-statistic:,94.78
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.5e-118
Time:,08:09:07,Log-Likelihood:,-90432.0
No. Observations:,32387,AIC:,180900.0
Df Residuals:,32380,BIC:,180900.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.4095,0.047,94.670,0.000,4.318,4.501
stance[T.FAVOR],0.1850,0.044,4.207,0.000,0.099,0.271
gender[T.Weiblich],0.3894,0.046,8.462,0.000,0.299,0.480
age[T.35-49],-0.0350,0.056,-0.630,0.529,-0.144,0.074
age[T.50-64],-0.8042,0.055,-14.617,0.000,-0.912,-0.696
age[T.65+],-0.8207,0.096,-8.582,0.000,-1.008,-0.633
residence[T.Stadt],-1.1479,0.089,-12.853,0.000,-1.323,-0.973

0,1,2,3
Omnibus:,10700.509,Durbin-Watson:,0.88
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32439.653
Skew:,1.736,Prob(JB):,0.0
Kurtosis:,6.461,Cond. No.,6.02


#### - **characters_per_word:** Stadt people use more characters per word
#### - **words_per_sentence:** Females add more words to a sentence. The older the shorter the sentences. Also Stadt people use shorter sentences.
#### - **type_token_ratio:** Very little deviation (as expected).
#### - **long_words:** Females and older people tend to write a little less longer words.
#### - **complex_words:** Same as with long words.

## Topic

In [43]:
results = smf.ols('characters_per_word ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,16.06
Date:,"Wed, 08 May 2024",Prob (F-statistic):,6.2e-32
Time:,08:22:58,Log-Likelihood:,-48210.0
No. Observations:,32387,AIC:,96440.0
Df Residuals:,32375,BIC:,96540.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.1052,0.030,201.138,0.000,6.046,6.165
topic[T.Economy],-0.0577,0.036,-1.599,0.110,-0.128,0.013
topic[T.Education],-0.1160,0.036,-3.190,0.001,-0.187,-0.045
topic[T.Finances],-0.0456,0.039,-1.162,0.245,-0.123,0.031
topic[T.Foreign Policy],-0.0691,0.040,-1.716,0.086,-0.148,0.010
topic[T.Healthcare],-0.2076,0.036,-5.734,0.000,-0.279,-0.137
topic[T.Immigration],-0.2364,0.036,-6.607,0.000,-0.307,-0.166
topic[T.Infrastructure & Environment],-0.1641,0.034,-4.794,0.000,-0.231,-0.097
topic[T.Political System],-0.2938,0.042,-6.943,0.000,-0.377,-0.211

0,1,2,3
Omnibus:,4119.727,Durbin-Watson:,1.075
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8647.357
Skew:,0.787,Prob(JB):,0.0
Kurtosis:,4.983,Cond. No.,18.9


In [44]:
results = smf.ols('words_per_sentence ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,5.211
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.95e-08
Time:,08:23:03,Log-Likelihood:,-139050.0
No. Observations:,32387,AIC:,278100.0
Df Residuals:,32375,BIC:,278200.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,25.6950,0.502,51.234,0.000,24.712,26.678
topic[T.Economy],1.8899,0.596,3.170,0.002,0.721,3.058
topic[T.Education],2.3614,0.601,3.928,0.000,1.183,3.540
topic[T.Finances],0.2844,0.649,0.439,0.661,-0.987,1.556
topic[T.Foreign Policy],0.2142,0.666,0.322,0.748,-1.090,1.519
topic[T.Healthcare],0.8023,0.598,1.341,0.180,-0.370,1.975
topic[T.Immigration],0.8485,0.591,1.435,0.151,-0.310,2.007
topic[T.Infrastructure & Environment],1.0574,0.566,1.869,0.062,-0.051,2.166
topic[T.Political System],-0.9341,0.699,-1.336,0.182,-2.305,0.436

0,1,2,3
Omnibus:,6511.973,Durbin-Watson:,1.037
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11439.37
Skew:,1.314,Prob(JB):,0.0
Kurtosis:,4.254,Cond. No.,18.9


In [45]:
results = smf.ols('type_token_ratio ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,2.604
Date:,"Wed, 08 May 2024",Prob (F-statistic):,0.00258
Time:,08:23:07,Log-Likelihood:,38603.0
No. Observations:,32387,AIC:,-77180.0
Df Residuals:,32375,BIC:,-77080.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9344,0.002,449.186,0.000,0.930,0.938
topic[T.Economy],-0.0040,0.002,-1.634,0.102,-0.009,0.001
topic[T.Education],-0.0062,0.002,-2.479,0.013,-0.011,-0.001
topic[T.Finances],-0.0021,0.003,-0.765,0.444,-0.007,0.003
topic[T.Foreign Policy],-0.0044,0.003,-1.584,0.113,-0.010,0.001
topic[T.Healthcare],-0.0030,0.002,-1.194,0.233,-0.008,0.002
topic[T.Immigration],-0.0047,0.002,-1.903,0.057,-0.009,0.000
topic[T.Infrastructure & Environment],-0.0015,0.002,-0.647,0.517,-0.006,0.003
topic[T.Political System],0.0010,0.003,0.356,0.722,-0.005,0.007

0,1,2,3
Omnibus:,3584.235,Durbin-Watson:,1.278
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4900.596
Skew:,-0.944,Prob(JB):,0.0
Kurtosis:,3.265,Cond. No.,18.9


In [46]:
results = smf.ols('long_words ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,14.99
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.63e-29
Time:,08:23:11,Log-Likelihood:,-104930.0
No. Observations:,32387,AIC:,209900.0
Df Residuals:,32375,BIC:,210000.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.4984,0.175,54.300,0.000,9.156,9.841
topic[T.Economy],0.5403,0.208,2.598,0.009,0.133,0.948
topic[T.Education],0.5299,0.210,2.528,0.011,0.119,0.941
topic[T.Finances],0.1695,0.226,0.749,0.454,-0.274,0.613
topic[T.Foreign Policy],-0.4545,0.232,-1.958,0.050,-0.909,0.000
topic[T.Healthcare],-0.4100,0.209,-1.965,0.049,-0.819,-0.001
topic[T.Immigration],-0.3571,0.206,-1.732,0.083,-0.761,0.047
topic[T.Infrastructure & Environment],0.3401,0.197,1.724,0.085,-0.047,0.727
topic[T.Political System],-1.0877,0.244,-4.460,0.000,-1.566,-0.610

0,1,2,3
Omnibus:,4340.309,Durbin-Watson:,1.171
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6289.269
Skew:,1.055,Prob(JB):,0.0
Kurtosis:,3.459,Cond. No.,18.9


In [47]:
results = smf.ols('complex_words ~ topic', data=corpus).fit()
print('Reference category is Digitisation')
results.summary()

Reference category is Digitisation


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,18.45
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.41e-37
Time:,08:23:14,Log-Likelihood:,-90613.0
No. Observations:,32387,AIC:,181300.0
Df Residuals:,32375,BIC:,181400.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.4888,0.112,39.931,0.000,4.268,4.709
topic[T.Economy],0.3116,0.134,2.332,0.020,0.050,0.574
topic[T.Education],-0.1051,0.135,-0.780,0.435,-0.369,0.159
topic[T.Finances],-0.4025,0.145,-2.769,0.006,-0.687,-0.118
topic[T.Foreign Policy],-0.1717,0.149,-1.151,0.250,-0.464,0.121
topic[T.Healthcare],-0.6487,0.134,-4.838,0.000,-0.911,-0.386
topic[T.Immigration],-0.1687,0.133,-1.273,0.203,-0.428,0.091
topic[T.Infrastructure & Environment],0.2114,0.127,1.667,0.095,-0.037,0.460
topic[T.Political System],-0.6416,0.157,-4.093,0.000,-0.949,-0.334

0,1,2,3
Omnibus:,10903.629,Durbin-Watson:,0.862
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33702.711
Skew:,1.763,Prob(JB):,0.0
Kurtosis:,6.542,Cond. No.,18.9


#### - In general, these style features seem to be more topic independent than e.g. text length. But (reasonably) there is some correlation between complex words and topics

## Civil status

In [61]:
results = smf.ols('characters_per_word ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,49.63
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.5900000000000002e-80
Time:,08:42:29,Log-Likelihood:,-48100.0
No. Observations:,32387,AIC:,96220.0
Df Residuals:,32378,BIC:,96290.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.7813,1.069,5.410,0.000,3.687,7.876
civil_status[T.Eingetragene Partnerschaft],0.1757,1.078,0.163,0.871,-1.938,2.290
civil_status[T.Geschieden],0.2239,1.069,0.209,0.834,-1.872,2.320
civil_status[T.Getrennt],0.1130,1.080,0.105,0.917,-2.004,2.230
civil_status[T.Konkubinat],-0.0286,1.069,-0.027,0.979,-2.124,2.067
civil_status[T.Ledig],0.2530,1.069,0.237,0.813,-1.842,2.348
civil_status[T.Nicht bekannt],0.0273,1.069,0.026,0.980,-2.067,2.122
civil_status[T.Verheiratet],0.2677,1.069,0.251,0.802,-1.827,2.362
civil_status[T.Verwitwet],-0.0886,1.070,-0.083,0.934,-2.187,2.010

0,1,2,3
Omnibus:,4404.091,Durbin-Watson:,1.094
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9748.023
Skew:,0.815,Prob(JB):,0.0
Kurtosis:,5.136,Cond. No.,620.0


In [62]:
results = smf.ols('words_per_sentence ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,22.01
Date:,"Wed, 08 May 2024",Prob (F-statistic):,8.429999999999999e-34
Time:,08:42:34,Log-Likelihood:,-138990.0
No. Observations:,32387,AIC:,278000.0
Df Residuals:,32378,BIC:,278100.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,32.0000,17.684,1.810,0.070,-2.661,66.661
civil_status[T.Eingetragene Partnerschaft],-9.4815,17.847,-0.531,0.595,-44.462,25.499
civil_status[T.Geschieden],-12.0865,17.695,-0.683,0.495,-46.769,22.596
civil_status[T.Getrennt],-7.0326,17.875,-0.393,0.694,-42.069,28.004
civil_status[T.Konkubinat],-6.8912,17.696,-0.389,0.697,-41.576,27.793
civil_status[T.Ledig],-4.9731,17.685,-0.281,0.779,-39.637,29.691
civil_status[T.Nicht bekannt],-4.5801,17.685,-0.259,0.796,-39.243,30.083
civil_status[T.Verheiratet],-6.0757,17.685,-0.344,0.731,-40.739,28.587
civil_status[T.Verwitwet],-7.4492,17.714,-0.421,0.674,-42.169,27.271

0,1,2,3
Omnibus:,6381.892,Durbin-Watson:,1.043
Prob(Omnibus):,0.0,Jarque-Bera (JB):,11076.556
Skew:,1.298,Prob(JB):,0.0
Kurtosis:,4.213,Cond. No.,620.0


In [63]:
results = smf.ols('type_token_ratio ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,17.69
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.33e-26
Time:,08:42:39,Log-Likelihood:,38659.0
No. Observations:,32387,AIC:,-77300.0
Df Residuals:,32378,BIC:,-77230.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9062,0.073,12.355,0.000,0.762,1.050
civil_status[T.Eingetragene Partnerschaft],0.0468,0.074,0.632,0.527,-0.098,0.192
civil_status[T.Geschieden],0.0448,0.073,0.610,0.542,-0.099,0.189
civil_status[T.Getrennt],0.0230,0.074,0.311,0.756,-0.122,0.168
civil_status[T.Konkubinat],0.0204,0.073,0.278,0.781,-0.123,0.164
civil_status[T.Ledig],0.0252,0.073,0.344,0.731,-0.119,0.169
civil_status[T.Nicht bekannt],0.0195,0.073,0.266,0.790,-0.124,0.163
civil_status[T.Verheiratet],0.0271,0.073,0.370,0.712,-0.117,0.171
civil_status[T.Verwitwet],0.0279,0.073,0.379,0.704,-0.116,0.172

0,1,2,3
Omnibus:,3534.056,Durbin-Watson:,1.286
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4812.805
Skew:,-0.936,Prob(JB):,0.0
Kurtosis:,3.255,Cond. No.,620.0


In [64]:
results = smf.ols('complex_words ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,27.99
Date:,"Wed, 08 May 2024",Prob (F-statistic):,8.1e-44
Time:,08:42:45,Log-Likelihood:,-90603.0
No. Observations:,32387,AIC:,181200.0
Df Residuals:,32378,BIC:,181300.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.0000,3.970,0.252,0.801,-6.781,8.781
civil_status[T.Eingetragene Partnerschaft],1.2963,4.006,0.324,0.746,-6.556,9.149
civil_status[T.Geschieden],2.0780,3.972,0.523,0.601,-5.708,9.864
civil_status[T.Getrennt],1.9783,4.013,0.493,0.622,-5.887,9.843
civil_status[T.Konkubinat],3.3524,3.972,0.844,0.399,-4.434,11.138
civil_status[T.Ledig],3.1335,3.970,0.789,0.430,-4.648,10.915
civil_status[T.Nicht bekannt],3.5988,3.970,0.907,0.365,-4.182,11.380
civil_status[T.Verheiratet],3.1294,3.970,0.788,0.431,-4.652,10.911
civil_status[T.Verwitwet],2.1220,3.976,0.534,0.594,-5.672,9.916

0,1,2,3
Omnibus:,10883.173,Durbin-Watson:,0.867
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33569.188
Skew:,1.76,Prob(JB):,0.0
Kurtosis:,6.533,Cond. No.,620.0


In [65]:
results = smf.ols('long_words ~ civil_status', data=corpus).fit()
print('Reference category is Aufgelöste Partnerschaft')
results.summary()

Reference category is Aufgelöste Partnerschaft


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,19.74
Date:,"Wed, 08 May 2024",Prob (F-statistic):,5.2699999999999996e-30
Time:,08:42:50,Log-Likelihood:,-104940.0
No. Observations:,32387,AIC:,209900.0
Df Residuals:,32378,BIC:,210000.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,11.0000,6.180,1.780,0.075,-1.113,23.113
civil_status[T.Eingetragene Partnerschaft],-3.8704,6.237,-0.621,0.535,-16.095,8.354
civil_status[T.Geschieden],-3.8220,6.184,-0.618,0.537,-15.942,8.298
civil_status[T.Getrennt],-2.5652,6.247,-0.411,0.681,-14.809,9.679
civil_status[T.Konkubinat],-2.0080,6.184,-0.325,0.745,-14.129,10.113
civil_status[T.Ledig],-1.2853,6.180,-0.208,0.835,-13.399,10.828
civil_status[T.Nicht bekannt],-1.6133,6.180,-0.261,0.794,-13.727,10.500
civil_status[T.Verheiratet],-1.5544,6.180,-0.252,0.801,-13.668,10.559
civil_status[T.Verwitwet],-3.2475,6.190,-0.525,0.600,-15.381,8.886

0,1,2,3
Omnibus:,4333.115,Durbin-Watson:,1.172
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6275.558
Skew:,1.054,Prob(JB):,0.0
Kurtosis:,3.456,Cond. No.,620.0


#### - Nothing really significant!

## Denomination

In [66]:
results = smf.ols('characters_per_word ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.034
Model:,OLS,Adj. R-squared:,0.034
Method:,Least Squares,F-statistic:,127.7
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.68e-237
Time:,08:43:18,Log-Likelihood:,-47733.0
No. Observations:,32387,AIC:,95490.0
Df Residuals:,32377,BIC:,95570.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.4702,0.167,32.744,0.000,5.143,5.798
denomination[T.Andere christliche Gemeinschaften],0.6291,0.172,3.649,0.000,0.291,0.967
denomination[T.Christ-katholisch],-0.1959,0.175,-1.119,0.263,-0.539,0.147
denomination[T.Christlich-orthodox],0.6741,0.269,2.503,0.012,0.146,1.202
denomination[T.Evangelischreformiert/protestantisch],0.7288,0.168,4.350,0.000,0.400,1.057
denomination[T.Islamische Gemeinschaften],0.2040,0.313,0.653,0.514,-0.409,0.817
denomination[T.Jüdische Gemeinschaften],0.8241,0.328,2.512,0.012,0.181,1.467
denomination[T.Konfessionslos],0.4811,0.168,2.866,0.004,0.152,0.810
denomination[T.Nicht bekannt],0.3038,0.167,1.816,0.069,-0.024,0.632

0,1,2,3
Omnibus:,4577.56,Durbin-Watson:,1.132
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10521.66
Skew:,0.831,Prob(JB):,0.0
Kurtosis:,5.244,Cond. No.,108.0


In [67]:
results = smf.ols('words_per_sentence ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.01
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,35.99
Date:,"Wed, 08 May 2024",Prob (F-statistic):,4.77e-64
Time:,08:43:24,Log-Likelihood:,-138910.0
No. Observations:,32387,AIC:,277800.0
Df Residuals:,32377,BIC:,277900.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,17.9375,2.790,6.430,0.000,12.469,23.406
denomination[T.Andere christliche Gemeinschaften],5.3551,2.879,1.860,0.063,-0.288,10.999
denomination[T.Christ-katholisch],14.7991,2.923,5.063,0.000,9.070,20.528
denomination[T.Christlich-orthodox],0.1825,4.498,0.041,0.968,-8.635,9.000
denomination[T.Evangelischreformiert/protestantisch],7.3857,2.798,2.639,0.008,1.901,12.870
denomination[T.Islamische Gemeinschaften],-2.5000,5.219,-0.479,0.632,-12.730,7.730
denomination[T.Jüdische Gemeinschaften],-1.9375,5.479,-0.354,0.724,-12.677,8.802
denomination[T.Konfessionslos],8.8452,2.803,3.155,0.002,3.351,14.340
denomination[T.Nicht bekannt],9.9575,2.793,3.565,0.000,4.482,15.433

0,1,2,3
Omnibus:,6169.214,Durbin-Watson:,1.053
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10489.689
Skew:,1.273,Prob(JB):,0.0
Kurtosis:,4.136,Cond. No.,108.0


In [68]:
results = smf.ols('type_token_ratio ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.015
Method:,Least Squares,F-statistic:,57.51
Date:,"Wed, 08 May 2024",Prob (F-statistic):,7.270000000000001e-105
Time:,08:43:30,Log-Likelihood:,38846.0
No. Observations:,32387,AIC:,-77670.0
Df Residuals:,32377,BIC:,-77590.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9546,0.012,82.778,0.000,0.932,0.977
denomination[T.Andere christliche Gemeinschaften],-0.0070,0.012,-0.589,0.556,-0.030,0.016
denomination[T.Christ-katholisch],-0.0565,0.012,-4.673,0.000,-0.080,-0.033
denomination[T.Christlich-orthodox],0.0035,0.019,0.189,0.850,-0.033,0.040
denomination[T.Evangelischreformiert/protestantisch],-0.0146,0.012,-1.260,0.208,-0.037,0.008
denomination[T.Islamische Gemeinschaften],-0.0117,0.022,-0.541,0.588,-0.054,0.031
denomination[T.Jüdische Gemeinschaften],0.0073,0.023,0.323,0.747,-0.037,0.052
denomination[T.Konfessionslos],-0.0253,0.012,-2.182,0.029,-0.048,-0.003
denomination[T.Nicht bekannt],-0.0317,0.012,-2.749,0.006,-0.054,-0.009

0,1,2,3
Omnibus:,3336.159,Durbin-Watson:,1.309
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4472.635
Skew:,-0.904,Prob(JB):,0.0
Kurtosis:,3.218,Cond. No.,108.0


In [69]:
results = smf.ols('long_words ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,13.35
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.3600000000000001e-21
Time:,08:43:34,Log-Likelihood:,-104960.0
No. Observations:,32387,AIC:,209900.0
Df Residuals:,32377,BIC:,210000.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.1000,0.978,6.239,0.000,4.184,8.016
denomination[T.Andere christliche Gemeinschaften],2.4505,1.009,2.428,0.015,0.473,4.428
denomination[T.Christ-katholisch],4.3756,1.024,4.272,0.000,2.368,6.383
denomination[T.Christlich-orthodox],0.0600,1.577,0.038,0.970,-3.030,3.150
denomination[T.Evangelischreformiert/protestantisch],3.1433,0.981,3.205,0.001,1.221,5.065
denomination[T.Islamische Gemeinschaften],-0.4125,1.829,-0.226,0.822,-3.998,3.173
denomination[T.Jüdische Gemeinschaften],-0.3857,1.920,-0.201,0.841,-4.149,3.378
denomination[T.Konfessionslos],3.5651,0.982,3.629,0.000,1.639,5.491
denomination[T.Nicht bekannt],3.4984,0.979,3.573,0.000,1.580,5.417

0,1,2,3
Omnibus:,4273.355,Durbin-Watson:,1.172
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6160.982
Skew:,1.046,Prob(JB):,0.0
Kurtosis:,3.436,Cond. No.,108.0


In [70]:
results = smf.ols('complex_words ~ denomination', data=corpus).fit()
print('Reference category is Andere Kirchen/Religionsgemeinschafte')
results.summary()

Reference category is Andere Kirchen/Religionsgemeinschafte


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.024
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,89.51
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.71e-165
Time:,08:43:40,Log-Likelihood:,-90316.0
No. Observations:,32387,AIC:,180700.0
Df Residuals:,32377,BIC:,180700.0
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.1250,0.622,5.023,0.000,1.906,4.344
denomination[T.Andere christliche Gemeinschaften],-0.0419,0.642,-0.065,0.948,-1.300,1.217
denomination[T.Christ-katholisch],3.3067,0.652,5.073,0.000,2.029,4.584
denomination[T.Christlich-orthodox],-1.1250,1.003,-1.121,0.262,-3.091,0.841
denomination[T.Evangelischreformiert/protestantisch],0.4814,0.624,0.771,0.440,-0.742,1.705
denomination[T.Islamische Gemeinschaften],-0.0625,1.164,-0.054,0.957,-2.344,2.219
denomination[T.Jüdische Gemeinschaften],-1.5536,1.222,-1.271,0.204,-3.949,0.841
denomination[T.Konfessionslos],1.0528,0.625,1.684,0.092,-0.173,2.278
denomination[T.Nicht bekannt],1.6814,0.623,2.699,0.007,0.460,2.902

0,1,2,3
Omnibus:,10472.062,Durbin-Watson:,0.892
Prob(Omnibus):,0.0,Jarque-Bera (JB):,31200.381
Skew:,1.704,Prob(JB):,0.0
Kurtosis:,6.391,Cond. No.,108.0


#### - **characters_per_word**: Some little effects.
#### - **words_per_sentence**: Christ-katholisch write the most words in their sentences, some more effects visible.
#### - **type_token_ratio**: Very little deviation (as expected).
#### - **long_words**: Christ-katholisch write more long words, but also protestants and people without confession or not indicated.
#### - **complex_words**: Christ-katholisch like complex words

## Political spectrum / Rile / Galtan

In [60]:
results = smf.ols('characters_per_word ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.017
Method:,Least Squares,F-statistic:,71.69
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.3300000000000001e-117
Time:,08:41:41,Log-Likelihood:,-48013.0
No. Observations:,32387,AIC:,96040.0
Df Residuals:,32378,BIC:,96120.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.1405,0.022,274.629,0.000,6.097,6.184
political_spectrum[T.Links und Konservativ-Liberal],-0.3034,0.024,-12.728,0.000,-0.350,-0.257
political_spectrum[T.Links und Liberal],-0.7921,0.072,-11.012,0.000,-0.933,-0.651
political_spectrum[T.Mitte und Konservativ],-0.0242,0.026,-0.922,0.356,-0.076,0.027
political_spectrum[T.Mitte und Konservativ-Liberal],-0.1632,0.028,-5.919,0.000,-0.217,-0.109
political_spectrum[T.Mitte und Liberal],-0.2391,0.040,-6.032,0.000,-0.317,-0.161
political_spectrum[T.Rechts und Konservativ],0.2597,0.072,3.590,0.000,0.118,0.401
political_spectrum[T.Rechts und Konservativ-Liberal],-0.1517,0.040,-3.812,0.000,-0.230,-0.074
political_spectrum[T.Rechts und Liberal],0.1195,0.053,2.243,0.025,0.015,0.224

0,1,2,3
Omnibus:,4637.102,Durbin-Watson:,1.093
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10596.158
Skew:,0.842,Prob(JB):,0.0
Kurtosis:,5.239,Cond. No.,16.5


In [73]:
results = smf.ols('characters_per_word ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.008
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,123.0
Date:,"Wed, 08 May 2024",Prob (F-statistic):,6.149999999999999e-54
Time:,08:44:43,Log-Likelihood:,-48175.0
No. Observations:,32387,AIC:,96360.0
Df Residuals:,32384,BIC:,96380.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.8669,0.008,757.523,0.000,5.852,5.882
rile[T.Mitte],0.1768,0.013,13.989,0.000,0.152,0.202
rile[T.Rechts],0.2519,0.027,9.474,0.000,0.200,0.304

0,1,2,3
Omnibus:,4469.074,Durbin-Watson:,1.085
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9911.405
Skew:,0.825,Prob(JB):,0.0
Kurtosis:,5.149,Cond. No.,4.85


In [74]:
results = smf.ols('characters_per_word ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,characters_per_word,R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,181.0
Date:,"Wed, 08 May 2024",Prob (F-statistic):,6.68e-79
Time:,08:44:49,Log-Likelihood:,-48118.0
No. Observations:,32387,AIC:,96240.0
Df Residuals:,32384,BIC:,96270.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.1306,0.012,531.290,0.000,6.108,6.153
galtan[T.Konservativ-Liberal],-0.2586,0.014,-19.010,0.000,-0.285,-0.232
galtan[T.Liberal],-0.2070,0.028,-7.449,0.000,-0.261,-0.153

0,1,2,3
Omnibus:,4515.286,Durbin-Watson:,1.083
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10107.201
Skew:,0.83,Prob(JB):,0.0
Kurtosis:,5.176,Cond. No.,6.04


In [75]:
results = smf.ols('words_per_sentence ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.024
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,100.8
Date:,"Wed, 08 May 2024",Prob (F-statistic):,9.790000000000001e-167
Time:,08:48:37,Log-Likelihood:,-138680.0
No. Observations:,32387,AIC:,277400.0
Df Residuals:,32378,BIC:,277400.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,23.5725,0.367,64.149,0.000,22.852,24.293
political_spectrum[T.Links und Konservativ-Liberal],5.5154,0.392,14.077,0.000,4.747,6.283
political_spectrum[T.Links und Liberal],2.6854,1.182,2.272,0.023,0.368,5.003
political_spectrum[T.Mitte und Konservativ],1.7123,0.431,3.975,0.000,0.868,2.557
political_spectrum[T.Mitte und Konservativ-Liberal],-1.0731,0.453,-2.368,0.018,-1.961,-0.185
political_spectrum[T.Mitte und Liberal],-1.0871,0.651,-1.669,0.095,-2.364,0.190
political_spectrum[T.Rechts und Konservativ],-2.7710,1.189,-2.331,0.020,-5.101,-0.441
political_spectrum[T.Rechts und Konservativ-Liberal],0.1707,0.654,0.261,0.794,-1.112,1.453
political_spectrum[T.Rechts und Liberal],-0.3591,0.875,-0.410,0.682,-2.075,1.357

0,1,2,3
Omnibus:,5927.627,Durbin-Watson:,1.067
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9880.65
Skew:,1.239,Prob(JB):,0.0
Kurtosis:,4.085,Cond. No.,16.5


In [76]:
results = smf.ols('words_per_sentence ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,261.2
Date:,"Wed, 08 May 2024",Prob (F-statistic):,3.02e-113
Time:,08:48:38,Log-Likelihood:,-138820.0
No. Observations:,32387,AIC:,277600.0
Df Residuals:,32384,BIC:,277700.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,28.3966,0.127,223.254,0.000,28.147,28.646
rile[T.Mitte],-4.4267,0.208,-21.322,0.000,-4.834,-4.020
rile[T.Rechts],-5.1969,0.437,-11.901,0.000,-6.053,-4.341

0,1,2,3
Omnibus:,6054.478,Durbin-Watson:,1.057
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10196.861
Skew:,1.257,Prob(JB):,0.0
Kurtosis:,4.111,Cond. No.,4.85


In [77]:
results = smf.ols('words_per_sentence ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,words_per_sentence,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,112.8
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.57e-49
Time:,08:48:38,Log-Likelihood:,-138960.0
No. Observations:,32387,AIC:,277900.0
Df Residuals:,32384,BIC:,278000.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,24.7062,0.191,129.550,0.000,24.332,25.080
galtan[T.Konservativ-Liberal],2.8229,0.225,12.555,0.000,2.382,3.264
galtan[T.Liberal],-1.5107,0.459,-3.290,0.001,-2.411,-0.611

0,1,2,3
Omnibus:,6243.42,Durbin-Watson:,1.045
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10686.413
Skew:,1.283,Prob(JB):,0.0
Kurtosis:,4.157,Cond. No.,6.04


In [78]:
results = smf.ols('type_token_ratio ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.017
Model:,OLS,Adj. R-squared:,0.017
Method:,Least Squares,F-statistic:,71.92
Date:,"Wed, 08 May 2024",Prob (F-statistic):,5.51e-118
Time:,08:51:14,Log-Likelihood:,38874.0
No. Observations:,32387,AIC:,-77730.0
Df Residuals:,32378,BIC:,-77650.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9409,0.002,615.482,0.000,0.938,0.944
political_spectrum[T.Links und Konservativ-Liberal],-0.0197,0.002,-12.082,0.000,-0.023,-0.016
political_spectrum[T.Links und Liberal],-0.0220,0.005,-4.463,0.000,-0.032,-0.012
political_spectrum[T.Mitte und Konservativ],-0.0026,0.002,-1.438,0.151,-0.006,0.001
political_spectrum[T.Mitte und Konservativ-Liberal],0.0009,0.002,0.457,0.648,-0.003,0.005
political_spectrum[T.Mitte und Liberal],0.0029,0.003,1.085,0.278,-0.002,0.008
political_spectrum[T.Rechts und Konservativ],-0.0042,0.005,-0.857,0.391,-0.014,0.005
political_spectrum[T.Rechts und Konservativ-Liberal],0.0031,0.003,1.154,0.249,-0.002,0.008
political_spectrum[T.Rechts und Liberal],-0.0027,0.004,-0.737,0.461,-0.010,0.004

0,1,2,3
Omnibus:,3280.549,Durbin-Watson:,1.306
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4380.118
Skew:,-0.895,Prob(JB):,0.0
Kurtosis:,3.198,Cond. No.,16.5


In [79]:
results = smf.ols('type_token_ratio ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,207.2
Date:,"Wed, 08 May 2024",Prob (F-statistic):,3.69e-90
Time:,08:51:15,Log-Likelihood:,38795.0
No. Observations:,32387,AIC:,-77580.0
Df Residuals:,32384,BIC:,-77560.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9235,0.001,1748.513,0.000,0.922,0.925
rile[T.Mitte],0.0166,0.001,19.276,0.000,0.015,0.018
rile[T.Rechts],0.0179,0.002,9.887,0.000,0.014,0.021

0,1,2,3
Omnibus:,3347.215,Durbin-Watson:,1.299
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4493.532
Skew:,-0.907,Prob(JB):,0.0
Kurtosis:,3.207,Cond. No.,4.85


In [80]:
results = smf.ols('type_token_ratio ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,type_token_ratio,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,104.1
Date:,"Wed, 08 May 2024",Prob (F-statistic):,8.579999999999999e-46
Time:,08:51:16,Log-Likelihood:,38693.0
No. Observations:,32387,AIC:,-77380.0
Df Residuals:,32384,BIC:,-77350.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.9390,0.001,1187.317,0.000,0.937,0.941
galtan[T.Konservativ-Liberal],-0.0126,0.001,-13.510,0.000,-0.014,-0.011
galtan[T.Liberal],-2.769e-05,0.002,-0.015,0.988,-0.004,0.004

0,1,2,3
Omnibus:,3435.166,Durbin-Watson:,1.289
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4644.222
Skew:,-0.921,Prob(JB):,0.0
Kurtosis:,3.222,Cond. No.,6.04


In [81]:
results = smf.ols('long_words ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.018
Model:,OLS,Adj. R-squared:,0.018
Method:,Least Squares,F-statistic:,73.93
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.23e-121
Time:,08:52:54,Log-Likelihood:,-104720.0
No. Observations:,32387,AIC:,209500.0
Df Residuals:,32378,BIC:,209500.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,8.5484,0.129,66.371,0.000,8.296,8.801
political_spectrum[T.Links und Konservativ-Liberal],1.5450,0.137,11.251,0.000,1.276,1.814
political_spectrum[T.Links und Liberal],0.5709,0.414,1.378,0.168,-0.241,1.383
political_spectrum[T.Mitte und Konservativ],0.7484,0.151,4.957,0.000,0.452,1.044
political_spectrum[T.Mitte und Konservativ-Liberal],-0.6177,0.159,-3.888,0.000,-0.929,-0.306
political_spectrum[T.Mitte und Liberal],-0.4380,0.228,-1.918,0.055,-0.886,0.010
political_spectrum[T.Rechts und Konservativ],1.3766,0.417,3.304,0.001,0.560,2.193
political_spectrum[T.Rechts und Konservativ-Liberal],-0.0919,0.229,-0.401,0.689,-0.541,0.358
political_spectrum[T.Rechts und Liberal],-0.0587,0.307,-0.191,0.848,-0.660,0.543

0,1,2,3
Omnibus:,4147.982,Durbin-Watson:,1.189
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5919.232
Skew:,1.025,Prob(JB):,0.0
Kurtosis:,3.431,Cond. No.,16.5


In [82]:
results = smf.ols('long_words ~ rile', data=corpus).fit()
print('Reference category is Links')
results.summary()

Reference category is Links


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.01
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,155.6
Date:,"Wed, 08 May 2024",Prob (F-statistic):,5.42e-68
Time:,08:52:55,Log-Likelihood:,-104860.0
No. Observations:,32387,AIC:,209700.0
Df Residuals:,32384,BIC:,209800.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.8975,0.045,222.014,0.000,9.810,9.985
rile[T.Mitte],-1.2284,0.073,-16.881,0.000,-1.371,-1.086
rile[T.Rechts],-1.2331,0.153,-8.057,0.000,-1.533,-0.933

0,1,2,3
Omnibus:,4174.558,Durbin-Watson:,1.179
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5971.924
Skew:,1.031,Prob(JB):,0.0
Kurtosis:,3.417,Cond. No.,4.85


In [83]:
results = smf.ols('long_words ~ galtan', data=corpus).fit()
print('Reference category is Konservativ')
results.summary()

Reference category is Konservativ


0,1,2,3
Dep. Variable:,long_words,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,44.86
Date:,"Wed, 08 May 2024",Prob (F-statistic):,3.5e-20
Time:,08:52:56,Log-Likelihood:,-104970.0
No. Observations:,32387,AIC:,209900.0
Df Residuals:,32384,BIC:,210000.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.1163,0.067,136.540,0.000,8.985,9.247
galtan[T.Konservativ-Liberal],0.4711,0.079,5.985,0.000,0.317,0.625
galtan[T.Liberal],-0.7658,0.161,-4.763,0.000,-1.081,-0.451

0,1,2,3
Omnibus:,4310.27,Durbin-Watson:,1.17
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6231.744
Skew:,1.051,Prob(JB):,0.0
Kurtosis:,3.447,Cond. No.,6.04


In [84]:
results = smf.ols('complex_words ~ political_spectrum', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.037
Model:,OLS,Adj. R-squared:,0.037
Method:,Least Squares,F-statistic:,157.2
Date:,"Wed, 08 May 2024",Prob (F-statistic):,4.7e-261
Time:,09:03:42,Log-Likelihood:,-90097.0
No. Observations:,32387,AIC:,180200.0
Df Residuals:,32378,BIC:,180300.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.3979,0.082,41.441,0.000,3.237,3.559
political_spectrum[T.Links und Konservativ-Liberal],1.5793,0.087,18.065,0.000,1.408,1.751
political_spectrum[T.Links und Liberal],1.9190,0.264,7.275,0.000,1.402,2.436
political_spectrum[T.Mitte und Konservativ],0.4554,0.096,4.738,0.000,0.267,0.644
political_spectrum[T.Mitte und Konservativ-Liberal],-0.0884,0.101,-0.874,0.382,-0.287,0.110
political_spectrum[T.Mitte und Liberal],-0.3403,0.145,-2.341,0.019,-0.625,-0.055
political_spectrum[T.Rechts und Konservativ],0.3896,0.265,1.469,0.142,-0.130,0.910
political_spectrum[T.Rechts und Konservativ-Liberal],-0.2441,0.146,-1.672,0.094,-0.530,0.042
political_spectrum[T.Rechts und Liberal],-0.4987,0.195,-2.553,0.011,-0.882,-0.116

0,1,2,3
Omnibus:,10074.484,Durbin-Watson:,0.903
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28949.413
Skew:,1.652,Prob(JB):,0.0
Kurtosis:,6.246,Cond. No.,16.5


In [85]:
results = smf.ols('complex_words ~ rile', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.025
Model:,OLS,Adj. R-squared:,0.025
Method:,Least Squares,F-statistic:,419.3
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.7199999999999998e-180
Time:,09:03:43,Log-Likelihood:,-90300.0
No. Observations:,32387,AIC:,180600.0
Df Residuals:,32384,BIC:,180600.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.7939,0.028,168.576,0.000,4.738,4.850
rile[T.Mitte],-1.2203,0.046,-26.290,0.000,-1.311,-1.129
rile[T.Rechts],-1.6241,0.098,-16.636,0.000,-1.815,-1.433

0,1,2,3
Omnibus:,10312.446,Durbin-Watson:,0.889
Prob(Omnibus):,0.0,Jarque-Bera (JB):,30173.214
Skew:,1.685,Prob(JB):,0.0
Kurtosis:,6.316,Cond. No.,4.85


In [86]:
results = smf.ols('complex_words ~ galtan', data=corpus).fit()
print('Reference category is Links und Konservativ')
results.summary()

Reference category is Links und Konservativ


0,1,2,3
Dep. Variable:,complex_words,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,190.0
Date:,"Wed, 08 May 2024",Prob (F-statistic):,9.18e-83
Time:,09:03:43,Log-Likelihood:,-90525.0
No. Observations:,32387,AIC:,181100.0
Df Residuals:,32384,BIC:,181100.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.7309,0.043,87.292,0.000,3.647,3.815
galtan[T.Konservativ-Liberal],0.8293,0.050,16.459,0.000,0.731,0.928
galtan[T.Liberal],-0.4095,0.103,-3.979,0.000,-0.611,-0.208

0,1,2,3
Omnibus:,10643.771,Durbin-Watson:,0.872
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32045.538
Skew:,1.73,Prob(JB):,0.0
Kurtosis:,6.432,Cond. No.,6.04


#### - **characters_per_word:** The more right the longer the words (significant, but no huge difference). Konservativ also means longer words (again, no huge difference). But combi "T.Rechts und Konservativ" might be worth a try in the reranker.
#### - **words_per_sentence:** The more right the shorter the sentences (also found with previous style features). Mixed picture with Konservativ-Liberal. In combination, at least "T.Links und Konservativ-Liberal" might be worth a try in the reranker.
#### - **type_token_ratio:** Very little deviation (as expected).
#### - **long_words:** The more right the less long words. Mixed picture with Konservativ-Liberal. In combination, a mixed picture I would say: "T.Links und Konservativ-Liberal" and "T.Rechts und Konservativ" write the most long words, "T.Mitte und Konservativ-Liberal" the less.
#### - **complex_words:** The more right the less complex words. Mixed picture with Konservativ-Liberal. In combination a mixed picture: "T.Links und Konservativ-Liberal" and "T.Links und Liberal" use more complex words than "T.Links und Konservativ"; for the combination with Mitte and Rechts, it seems to be the other way round (aka the more conservative the more complex words)

-------

# Syntactic features: POS distribution

In [240]:
#ADJ: adjective
#ADP: adposition
#ADV: adverb
#AUX: auxiliary
#CCONJ: coordinating conjunction
#DET: determiner
#INTJ: interjection
#NOUN: noun
#NUM: numeral
#PART: particle
#PRON: pronoun
#PROPN: proper noun
#PUNCT: punctuation
#SCONJ: subordinating conjunction
#SYM: symbol
#VERB: verb
#X: other

upos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 
             'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']

nlp = spacy.load("de_core_news_sm")


def pos_features(text):
    doc = nlp(text)
    doc_pos = [token.pos_ for token in doc]
    doc_length = len(doc_pos)
    counts = {tag: doc_pos.count(tag)/doc_length for tag in upos_tags} # proportion of pos tags in text
    return counts


def entity_feature(text):
    doc = nlp(text)
    doc_ent = [token.ent_iob_ for token in doc]
    doc_length = len(doc_ent)
    count = (doc_ent.count('B')+doc_ent.count('I'))/doc_length # proportion of entities in text
    return count


def morph_features(text):
    doc = nlp(text)
    doc_morph = [token.morph for token in doc]
    doc_length = len(doc_morph)
    tense = sum([1 for token in doc_morph if "Tense=Pres" in token])/doc_length
    mood = sum([1 for token in doc_morph if "Mood=Imp" in token])/doc_length
    person = sum([1 for token in doc_morph if "Person=1" in token])/doc_length
    return {"past_tense": tense, "imperative": mood, "first_person": person}

In [162]:
# add column with pos tags (as dict)
corpus['POS'] = corpus['argument'].apply(lambda x: pos_features(x))
# pos dict to single columns
corpus = pd.concat([corpus, corpus['POS'].apply(pd.Series)], axis=1)
corpus = corpus.drop('POS', axis=1)

In [203]:
# add column with entity ratio
corpus['Entities'] = corpus['argument'].apply(lambda x: entity_feature(x))

In [242]:
# add column with morphology features
corpus['Morph'] = corpus['argument'].apply(lambda x: morph_features(x))
# morph dict to single columns
corpus = pd.concat([corpus, corpus['Morph'].apply(pd.Series)], axis=1)
corpus = corpus.drop('Morph', axis=1)

In [246]:
corpus.head()

Unnamed: 0,argument_id,argument,stance,topic,gender,age,residence,civil_status,denomination,education,...,PROPN,PUNCT,SCONJ,SYM,VERB,X,Entities,past_tense,imperative,first_person
0,201900,Das Schweizer Volk hat die MEI angenommen und ...,FAVOR,Immigration,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,...,0.03125,0.09375,0.0,0.0,0.125,0.0,0.09375,0.0625,0.0,0.0
1,201901,Eine Legalisierung von Cannabis entlasten die ...,FAVOR,Society,Männlich,18-34,Land,Ledig,Christ-katholisch,Fachhochschule,...,0.0,0.133333,0.033333,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0
2,201902,Durch die Förderung der familienergänzenden Be...,FAVOR,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,0.0,0.08,0.0,0.0,0.08,0.0,0.0,0.08,0.0,0.0
3,201903,Ich ziehe eine Elternzeit vor. Die Zeit nach d...,AGAINST,Welfare,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,0.0,0.105263,0.0,0.0,0.105263,0.0,0.052632,0.105263,0.0,0.105263
4,201904,Unser Asylrecht muss konsequent angewendet wer...,AGAINST,Immigration,Weiblich,35-49,Land,Ledig,Nicht bekannt,Universität,...,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.071429,0.0,0.0


------
## Stance, gender, age, residence

In [174]:
results = smf.ols('ADJ ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,ADJ,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,21.4
Date:,"Wed, 08 May 2024",Prob (F-statistic):,3.1699999999999997e-25
Time:,12:09:31,Log-Likelihood:,52051.0
No. Observations:,32387,AIC:,-104100.0
Df Residuals:,32380,BIC:,-104000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0484,0.001,84.549,0.000,0.047,0.050
stance[T.FAVOR],-0.0012,0.001,-2.288,0.022,-0.002,-0.000
gender[T.Weiblich],-0.0013,0.001,-2.370,0.018,-0.002,-0.000
age[T.35-49],0.0011,0.001,1.653,0.098,-0.000,0.002
age[T.50-64],0.0015,0.001,2.264,0.024,0.000,0.003
age[T.65+],-0.0019,0.001,-1.648,0.099,-0.004,0.000
residence[T.Stadt],0.0112,0.001,10.180,0.000,0.009,0.013

0,1,2,3
Omnibus:,6996.526,Durbin-Watson:,1.726
Prob(Omnibus):,0.0,Jarque-Bera (JB):,16057.031
Skew:,1.226,Prob(JB):,0.0
Kurtosis:,5.425,Cond. No.,6.02


In [176]:
results = smf.ols('ADP ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,ADP,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,65.98
Date:,"Wed, 08 May 2024",Prob (F-statistic):,6.989999999999999e-82
Time:,12:13:05,Log-Likelihood:,47532.0
No. Observations:,32387,AIC:,-95050.0
Df Residuals:,32380,BIC:,-94990.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0559,0.001,84.910,0.000,0.055,0.057
stance[T.FAVOR],-0.0012,0.001,-1.979,0.048,-0.002,-1.19e-05
gender[T.Weiblich],-0.0015,0.001,-2.321,0.020,-0.003,-0.000
age[T.35-49],-0.0006,0.001,-0.757,0.449,-0.002,0.001
age[T.50-64],0.0049,0.001,6.282,0.000,0.003,0.006
age[T.65+],-0.0033,0.001,-2.461,0.014,-0.006,-0.001
residence[T.Stadt],0.0222,0.001,17.572,0.000,0.020,0.025

0,1,2,3
Omnibus:,3250.431,Durbin-Watson:,1.207
Prob(Omnibus):,0.0,Jarque-Bera (JB):,4334.129
Skew:,0.849,Prob(JB):,0.0
Kurtosis:,3.573,Cond. No.,6.02


In [175]:
results = smf.ols('ADV ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,ADV,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,39.95
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.01e-48
Time:,12:10:20,Log-Likelihood:,37797.0
No. Observations:,32387,AIC:,-75580.0
Df Residuals:,32380,BIC:,-75520.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0809,0.001,91.070,0.000,0.079,0.083
stance[T.FAVOR],3.41e-05,0.001,0.041,0.968,-0.002,0.002
gender[T.Weiblich],-0.0045,0.001,-5.167,0.000,-0.006,-0.003
age[T.35-49],-0.0010,0.001,-0.984,0.325,-0.003,0.001
age[T.50-64],0.0063,0.001,5.970,0.000,0.004,0.008
age[T.65+],-0.0035,0.002,-1.929,0.054,-0.007,5.71e-05
residence[T.Stadt],0.0206,0.002,12.089,0.000,0.017,0.024

0,1,2,3
Omnibus:,5665.334,Durbin-Watson:,1.374
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10427.175
Skew:,1.103,Prob(JB):,0.0
Kurtosis:,4.691,Cond. No.,6.02


In [177]:
results = smf.ols('AUX ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,AUX,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,74.4
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.3e-92
Time:,12:14:07,Log-Likelihood:,46566.0
No. Observations:,32387,AIC:,-93120.0
Df Residuals:,32380,BIC:,-93060.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0548,0.001,80.816,0.000,0.053,0.056
stance[T.FAVOR],-0.0050,0.001,-7.751,0.000,-0.006,-0.004
gender[T.Weiblich],-0.0009,0.001,-1.395,0.163,-0.002,0.000
age[T.35-49],-0.0002,0.001,-0.282,0.778,-0.002,0.001
age[T.50-64],0.0046,0.001,5.718,0.000,0.003,0.006
age[T.65+],-0.0027,0.001,-1.913,0.056,-0.005,6.48e-05
residence[T.Stadt],0.0234,0.001,17.983,0.000,0.021,0.026

0,1,2,3
Omnibus:,4642.294,Durbin-Watson:,1.116
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7149.874
Skew:,1.024,Prob(JB):,0.0
Kurtosis:,4.051,Cond. No.,6.02


In [178]:
results = smf.ols('CCONJ ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,CCONJ,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,48.56
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.11e-59
Time:,12:15:32,Log-Likelihood:,65263.0
No. Observations:,32387,AIC:,-130500.0
Df Residuals:,32380,BIC:,-130500.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0194,0.000,50.873,0.000,0.019,0.020
stance[T.FAVOR],-0.0008,0.000,-2.113,0.035,-0.001,-5.51e-05
gender[T.Weiblich],0.0008,0.000,2.194,0.028,8.81e-05,0.002
age[T.35-49],0.0008,0.000,1.686,0.092,-0.000,0.002
age[T.50-64],0.0026,0.000,5.721,0.000,0.002,0.003
age[T.65+],0.0004,0.001,0.528,0.597,-0.001,0.002
residence[T.Stadt],0.0113,0.001,15.498,0.000,0.010,0.013

0,1,2,3
Omnibus:,10245.941,Durbin-Watson:,1.472
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28782.828
Skew:,1.696,Prob(JB):,0.0
Kurtosis:,6.133,Cond. No.,6.02


In [179]:
results = smf.ols('DET ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,DET,R-squared:,0.018
Model:,OLS,Adj. R-squared:,0.018
Method:,Least Squares,F-statistic:,99.58
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.2000000000000002e-124
Time:,12:16:02,Log-Likelihood:,39975.0
No. Observations:,32387,AIC:,-79940.0
Df Residuals:,32380,BIC:,-79880.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0949,0.001,114.184,0.000,0.093,0.096
stance[T.FAVOR],-0.0067,0.001,-8.550,0.000,-0.008,-0.005
gender[T.Weiblich],-0.0026,0.001,-3.166,0.002,-0.004,-0.001
age[T.35-49],-0.0013,0.001,-1.270,0.204,-0.003,0.001
age[T.50-64],0.0013,0.001,1.290,0.197,-0.001,0.003
age[T.65+],-0.0104,0.002,-6.081,0.000,-0.014,-0.007
residence[T.Stadt],0.0343,0.002,21.517,0.000,0.031,0.037

0,1,2,3
Omnibus:,1317.01,Durbin-Watson:,1.061
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1397.215
Skew:,0.486,Prob(JB):,3.97e-304
Kurtosis:,2.702,Cond. No.,6.02


In [180]:
results = smf.ols('INTJ ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,INTJ,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,1.267
Date:,"Wed, 08 May 2024",Prob (F-statistic):,0.269
Time:,12:16:34,Log-Likelihood:,167020.0
No. Observations:,32387,AIC:,-334000.0
Df Residuals:,32380,BIC:,-334000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.617e-05,1.64e-05,4.025,0.000,3.39e-05,9.84e-05
stance[T.FAVOR],-2.656e-05,1.55e-05,-1.711,0.087,-5.7e-05,3.86e-06
gender[T.Weiblich],-9.08e-06,1.62e-05,-0.559,0.576,-4.09e-05,2.28e-05
age[T.35-49],2.176e-05,1.96e-05,1.108,0.268,-1.67e-05,6.02e-05
age[T.50-64],-6.528e-06,1.94e-05,-0.336,0.737,-4.46e-05,3.15e-05
age[T.65+],2.608e-05,3.38e-05,0.773,0.440,-4.01e-05,9.22e-05
residence[T.Stadt],-4.241e-05,3.15e-05,-1.345,0.179,-0.000,1.94e-05

0,1,2,3
Omnibus:,99237.097,Durbin-Watson:,1.717
Prob(Omnibus):,0.0,Jarque-Bera (JB):,10976979009.337
Skew:,46.563,Prob(JB):,0.0
Kurtosis:,2853.559,Cond. No.,6.02


In [181]:
results = smf.ols('NOUN ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,NOUN,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,87.9
Date:,"Wed, 08 May 2024",Prob (F-statistic):,8.470000000000001e-110
Time:,12:16:49,Log-Likelihood:,33526.0
No. Observations:,32387,AIC:,-67040.0
Df Residuals:,32380,BIC:,-66980.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1616,0.001,159.418,0.000,0.160,0.164
stance[T.FAVOR],-0.0033,0.001,-3.404,0.001,-0.005,-0.001
gender[T.Weiblich],0.0015,0.001,1.459,0.145,-0.001,0.003
age[T.35-49],0.0015,0.001,1.230,0.219,-0.001,0.004
age[T.50-64],0.0083,0.001,6.927,0.000,0.006,0.011
age[T.65+],-0.0083,0.002,-3.974,0.000,-0.012,-0.004
residence[T.Stadt],0.0395,0.002,20.334,0.000,0.036,0.043

0,1,2,3
Omnibus:,146.127,Durbin-Watson:,1.04
Prob(Omnibus):,0.0,Jarque-Bera (JB):,148.114
Skew:,0.163,Prob(JB):,6.88e-33
Kurtosis:,3.061,Cond. No.,6.02


In [182]:
results = smf.ols('NUM ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,NUM,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,4.058
Date:,"Wed, 08 May 2024",Prob (F-statistic):,0.000452
Time:,12:17:17,Log-Likelihood:,88793.0
No. Observations:,32387,AIC:,-177600.0
Df Residuals:,32380,BIC:,-177500.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0038,0.000,20.468,0.000,0.003,0.004
stance[T.FAVOR],-7.784e-05,0.000,-0.448,0.654,-0.000,0.000
gender[T.Weiblich],0.0006,0.000,3.174,0.002,0.000,0.001
age[T.35-49],-0.0002,0.000,-0.808,0.419,-0.001,0.000
age[T.50-64],0.0005,0.000,2.166,0.030,4.47e-05,0.001
age[T.65+],0.0008,0.000,2.093,0.036,5.02e-05,0.002
residence[T.Stadt],0.0005,0.000,1.484,0.138,-0.000,0.001

0,1,2,3
Omnibus:,32610.929,Durbin-Watson:,1.797
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1635475.699
Skew:,5.111,Prob(JB):,0.0
Kurtosis:,36.279,Cond. No.,6.02


In [183]:
results = smf.ols('PART ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,PART,R-squared:,0.008
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,42.76
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.6199999999999998e-52
Time:,12:17:33,Log-Likelihood:,69139.0
No. Observations:,32387,AIC:,-138300.0
Df Residuals:,32380,BIC:,-138200.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0165,0.000,48.921,0.000,0.016,0.017
stance[T.FAVOR],-0.0021,0.000,-6.639,0.000,-0.003,-0.001
gender[T.Weiblich],-0.0021,0.000,-6.390,0.000,-0.003,-0.001
age[T.35-49],-0.0005,0.000,-1.209,0.227,-0.001,0.000
age[T.50-64],0.0016,0.000,4.119,0.000,0.001,0.002
age[T.65+],0.0020,0.001,2.920,0.004,0.001,0.003
residence[T.Stadt],0.0073,0.001,11.302,0.000,0.006,0.009

0,1,2,3
Omnibus:,15288.005,Durbin-Watson:,1.647
Prob(Omnibus):,0.0,Jarque-Bera (JB):,85321.911
Skew:,2.268,Prob(JB):,0.0
Kurtosis:,9.531,Cond. No.,6.02


In [184]:
results = smf.ols('PRON ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,PRON,R-squared:,0.01
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,51.97
Date:,"Wed, 08 May 2024",Prob (F-statistic):,4.98e-64
Time:,12:18:00,Log-Likelihood:,53217.0
No. Observations:,32387,AIC:,-106400.0
Df Residuals:,32380,BIC:,-106400.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0355,0.001,64.373,0.000,0.034,0.037
stance[T.FAVOR],-0.0026,0.001,-4.978,0.000,-0.004,-0.002
gender[T.Weiblich],-0.0006,0.001,-1.158,0.247,-0.002,0.000
age[T.35-49],-0.0026,0.001,-3.901,0.000,-0.004,-0.001
age[T.50-64],0.0029,0.001,4.516,0.000,0.002,0.004
age[T.65+],-0.0013,0.001,-1.181,0.238,-0.004,0.001
residence[T.Stadt],0.0155,0.001,14.638,0.000,0.013,0.018

0,1,2,3
Omnibus:,9393.325,Durbin-Watson:,1.416
Prob(Omnibus):,0.0,Jarque-Bera (JB):,25141.6
Skew:,1.568,Prob(JB):,0.0
Kurtosis:,5.966,Cond. No.,6.02


In [185]:
results = smf.ols('PROPN ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,PROPN,R-squared:,0.033
Model:,OLS,Adj. R-squared:,0.033
Method:,Least Squares,F-statistic:,184.9
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2e-232
Time:,12:18:32,Log-Likelihood:,5716.9
No. Observations:,32387,AIC:,-11420.0
Df Residuals:,32380,BIC:,-11360.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1463,0.002,61.156,0.000,0.142,0.151
stance[T.FAVOR],0.0183,0.002,8.105,0.000,0.014,0.023
gender[T.Weiblich],0.0051,0.002,2.176,0.030,0.001,0.010
age[T.35-49],-0.0015,0.003,-0.511,0.609,-0.007,0.004
age[T.50-64],-0.0293,0.003,-10.361,0.000,-0.035,-0.024
age[T.65+],0.0184,0.005,3.751,0.000,0.009,0.028
residence[T.Stadt],-0.1336,0.005,-29.116,0.000,-0.143,-0.125

0,1,2,3
Omnibus:,4587.1,Durbin-Watson:,0.32
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6892.31
Skew:,1.13,Prob(JB):,0.0
Kurtosis:,3.022,Cond. No.,6.02


In [186]:
results = smf.ols('PUNCT ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,PUNCT,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.007
Method:,Least Squares,F-statistic:,38.45
Date:,"Wed, 08 May 2024",Prob (F-statistic):,7.95e-47
Time:,12:19:33,Log-Likelihood:,50627.0
No. Observations:,32387,AIC:,-101200.0
Df Residuals:,32380,BIC:,-101200.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1100,0.001,183.931,0.000,0.109,0.111
stance[T.FAVOR],-0.0020,0.001,-3.531,0.000,-0.003,-0.001
gender[T.Weiblich],0.0001,0.001,0.253,0.800,-0.001,0.001
age[T.35-49],0.0053,0.001,7.404,0.000,0.004,0.007
age[T.50-64],0.0054,0.001,7.619,0.000,0.004,0.007
age[T.65+],0.0070,0.001,5.721,0.000,0.005,0.009
residence[T.Stadt],0.0127,0.001,11.044,0.000,0.010,0.015

0,1,2,3
Omnibus:,3507.384,Durbin-Watson:,1.492
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8060.127
Skew:,0.658,Prob(JB):,0.0
Kurtosis:,5.059,Cond. No.,6.02


In [187]:
results = smf.ols('SCONJ ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,SCONJ,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,28.93
Date:,"Wed, 08 May 2024",Prob (F-statistic):,9.77e-35
Time:,12:20:01,Log-Likelihood:,79081.0
No. Observations:,32387,AIC:,-158100.0
Df Residuals:,32380,BIC:,-158100.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0092,0.000,37.141,0.000,0.009,0.010
stance[T.FAVOR],0.0015,0.000,6.600,0.000,0.001,0.002
gender[T.Weiblich],2.735e-05,0.000,0.111,0.911,-0.000,0.001
age[T.35-49],-0.0007,0.000,-2.389,0.017,-0.001,-0.000
age[T.50-64],-0.0003,0.000,-0.992,0.321,-0.001,0.000
age[T.65+],-0.0016,0.001,-3.091,0.002,-0.003,-0.001
residence[T.Stadt],0.0051,0.000,10.753,0.000,0.004,0.006

0,1,2,3
Omnibus:,16063.804,Durbin-Watson:,1.683
Prob(Omnibus):,0.0,Jarque-Bera (JB):,87796.654
Skew:,2.426,Prob(JB):,0.0
Kurtosis:,9.444,Cond. No.,6.02


In [188]:
results = smf.ols('SYM ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

  return 1 - self.ssr/self.centered_tss
  return self.mse_model/self.mse_resid
  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
  dw = np.sum(diff_resids**2, axis=axis) / np.sum(resids**2, axis=axis)


0,1,2,3
Dep. Variable:,SYM,R-squared:,
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Wed, 08 May 2024",Prob (F-statistic):,
Time:,12:20:27,Log-Likelihood:,inf
No. Observations:,32387,AIC:,-inf
Df Residuals:,32380,BIC:,-inf
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0,0,,,0,0
stance[T.FAVOR],0,0,,,0,0
gender[T.Weiblich],0,0,,,0,0
age[T.35-49],0,0,,,0,0
age[T.50-64],0,0,,,0,0
age[T.65+],0,0,,,0,0
residence[T.Stadt],0,0,,,0,0

0,1,2,3
Omnibus:,,Durbin-Watson:,
Prob(Omnibus):,,Jarque-Bera (JB):,
Skew:,,Prob(JB):,
Kurtosis:,,Cond. No.,6.02


In [189]:
results = smf.ols('VERB ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,VERB,R-squared:,0.015
Model:,OLS,Adj. R-squared:,0.015
Method:,Least Squares,F-statistic:,80.85
Date:,"Wed, 08 May 2024",Prob (F-statistic):,7.96e-101
Time:,12:20:50,Log-Likelihood:,50575.0
No. Observations:,32387,AIC:,-101100.0
Df Residuals:,32380,BIC:,-101100.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0701,0.001,117.003,0.000,0.069,0.071
stance[T.FAVOR],-0.0051,0.001,-8.967,0.000,-0.006,-0.004
gender[T.Weiblich],-0.0010,0.001,-1.704,0.088,-0.002,0.000
age[T.35-49],-0.0006,0.001,-0.787,0.432,-0.002,0.001
age[T.50-64],0.0043,0.001,6.028,0.000,0.003,0.006
age[T.65+],-0.0049,0.001,-3.956,0.000,-0.007,-0.002
residence[T.Stadt],0.0203,0.001,17.697,0.000,0.018,0.023

0,1,2,3
Omnibus:,1378.612,Durbin-Watson:,1.29
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1583.55
Skew:,0.506,Prob(JB):,0.0
Kurtosis:,3.385,Cond. No.,6.02


In [190]:
results = smf.ols('X ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,X,R-squared:,0.028
Model:,OLS,Adj. R-squared:,0.027
Method:,Least Squares,F-statistic:,152.8
Date:,"Wed, 08 May 2024",Prob (F-statistic):,4.8200000000000005e-192
Time:,12:21:07,Log-Likelihood:,16532.0
No. Observations:,32387,AIC:,-33050.0
Df Residuals:,32380,BIC:,-32990.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0902,0.002,52.629,0.000,0.087,0.094
stance[T.FAVOR],0.0107,0.002,6.625,0.000,0.008,0.014
gender[T.Weiblich],0.0071,0.002,4.165,0.000,0.004,0.010
age[T.35-49],-0.0007,0.002,-0.336,0.737,-0.005,0.003
age[T.50-64],-0.0160,0.002,-7.903,0.000,-0.020,-0.012
age[T.65+],0.0065,0.004,1.845,0.065,-0.000,0.013
residence[T.Stadt],-0.0900,0.003,-27.381,0.000,-0.096,-0.084

0,1,2,3
Omnibus:,8391.531,Durbin-Watson:,0.502
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17484.153
Skew:,1.552,Prob(JB):,0.0
Kurtosis:,4.821,Cond. No.,6.02


In [247]:
results = smf.ols('Entities ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,Entities,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,89.19
Date:,"Wed, 08 May 2024",Prob (F-statistic):,1.97e-111
Time:,13:03:33,Log-Likelihood:,12356.0
No. Observations:,32387,AIC:,-24700.0
Df Residuals:,32380,BIC:,-24640.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.1171,0.002,60.069,0.000,0.113,0.121
stance[T.FAVOR],0.0096,0.002,5.226,0.000,0.006,0.013
gender[T.Weiblich],0.0031,0.002,1.625,0.104,-0.001,0.007
age[T.35-49],-0.0013,0.002,-0.545,0.586,-0.006,0.003
age[T.50-64],-0.0124,0.002,-5.395,0.000,-0.017,-0.008
age[T.65+],0.0117,0.004,2.936,0.003,0.004,0.020
residence[T.Stadt],-0.0787,0.004,-21.063,0.000,-0.086,-0.071

0,1,2,3
Omnibus:,13547.234,Durbin-Watson:,1.024
Prob(Omnibus):,0.0,Jarque-Bera (JB):,56779.157
Skew:,2.086,Prob(JB):,0.0
Kurtosis:,7.966,Cond. No.,6.02


In [248]:
results = smf.ols('past_tense ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,past_tense,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.016
Method:,Least Squares,F-statistic:,89.69
Date:,"Wed, 08 May 2024",Prob (F-statistic):,4.44e-112
Time:,13:05:50,Log-Likelihood:,52031.0
No. Observations:,32387,AIC:,-104000.0
Df Residuals:,32380,BIC:,-104000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0613,0.001,107.145,0.000,0.060,0.062
stance[T.FAVOR],-0.0058,0.001,-10.680,0.000,-0.007,-0.005
gender[T.Weiblich],-0.0012,0.001,-2.067,0.039,-0.002,-6.07e-05
age[T.35-49],0.0020,0.001,2.968,0.003,0.001,0.003
age[T.50-64],0.0066,0.001,9.735,0.000,0.005,0.008
age[T.65+],-0.0002,0.001,-0.137,0.891,-0.002,0.002
residence[T.Stadt],0.0190,0.001,17.309,0.000,0.017,0.021

0,1,2,3
Omnibus:,1107.795,Durbin-Watson:,1.219
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1218.536
Skew:,0.471,Prob(JB):,2.5e-265
Kurtosis:,2.881,Cond. No.,6.02


In [249]:
results = smf.ols('imperative ~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,imperative,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,1.061
Date:,"Wed, 08 May 2024",Prob (F-statistic):,0.383
Time:,13:05:59,Log-Likelihood:,146730.0
No. Observations:,32387,AIC:,-293400.0
Df Residuals:,32380,BIC:,-293400.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0001,3.08e-05,4.470,0.000,7.72e-05,0.000
stance[T.FAVOR],-4.748e-06,2.9e-05,-0.163,0.870,-6.17e-05,5.22e-05
gender[T.Weiblich],1.471e-05,3.04e-05,0.484,0.628,-4.49e-05,7.43e-05
age[T.35-49],-2.567e-05,3.67e-05,-0.699,0.485,-9.77e-05,4.63e-05
age[T.50-64],1.051e-05,3.63e-05,0.289,0.772,-6.07e-05,8.17e-05
age[T.65+],0.0001,6.32e-05,1.671,0.095,-1.82e-05,0.000
residence[T.Stadt],8.52e-05,5.9e-05,1.445,0.149,-3.04e-05,0.000

0,1,2,3
Omnibus:,77332.903,Durbin-Watson:,1.951
Prob(Omnibus):,0.0,Jarque-Bera (JB):,754976508.477
Skew:,24.684,Prob(JB):,0.0
Kurtosis:,749.344,Cond. No.,6.02


In [250]:
results = smf.ols('first_person~ stance + gender + age + residence', data=corpus).fit()
results.summary()

0,1,2,3
Dep. Variable:,first_person,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,10.19
Date:,"Wed, 08 May 2024",Prob (F-statistic):,2.71e-11
Time:,13:06:18,Log-Likelihood:,56936.0
No. Observations:,32387,AIC:,-113900.0
Df Residuals:,32380,BIC:,-113800.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0156,0.000,31.801,0.000,0.015,0.017
stance[T.FAVOR],-0.0016,0.000,-3.473,0.001,-0.003,-0.001
gender[T.Weiblich],0.0012,0.000,2.463,0.014,0.000,0.002
age[T.35-49],-0.0015,0.001,-2.636,0.008,-0.003,-0.000
age[T.50-64],-0.0006,0.001,-0.978,0.328,-0.002,0.001
age[T.65+],-0.0038,0.001,-3.803,0.000,-0.006,-0.002
residence[T.Stadt],0.0049,0.001,5.173,0.000,0.003,0.007

0,1,2,3
Omnibus:,25816.616,Durbin-Watson:,1.584
Prob(Omnibus):,0.0,Jarque-Bera (JB):,534776.285
Skew:,3.827,Prob(JB):,0.0
Kurtosis:,21.377,Cond. No.,6.02


#### *residence:*
#### - POS: **Stadt** people use more ADJECTIVES, ADPOSITIONS, ADVERBS, AUXILIARIES, COORDINATING CONJUNCTION, DETERMINER, NOUNS, PARTICLE, PRONOUNS, PUNCTUATION, SUBORDINATING CONJUNCTIONS, VERBS. **Land** people use more PROPER NOUNS, OTHERS.
#### - Entities: **Stadt** people use fewer entities.
#### - Morphological: **Stadt* people use more past tense and first person.