In diesem notebook wurde ein Chi-Quadrat-Test zum Zusammenhang zwischen Uneinigkeit und Ton berechnet. Außerdem werden die längeren Konversationen, in denen Uneinigkeit herrscht, geplottet.

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from scipy import stats
import math

In [None]:
data = pd.read_excel('../daten/data_disagreement.xlsx')
len(data)

In [None]:
data_short = data.drop_duplicates(subset='5: post_id')
len(data_short)

In [None]:
data_short = data_short[data_short['var_tone']!=-99]
data_short = data_short[data_short['disagreement'].notna()]
len(data_short)

In [None]:
crosstab_long = pd.crosstab(index=data_short['disagreement'], columns=data_short['var_tone'])
crosstab_long

In [None]:
stats.chi2_contingency(crosstab_long)

In [None]:
math.sqrt(27.49617604650556/171)

In [None]:
data_short['dich_tone'] = data_short['var_tone'].apply(lambda x: -1 if x < 0 else 1)


In [None]:
crosstab = pd.crosstab(index=data_short['disagreement'], columns=data_short['dich_tone'])
crosstab

In [None]:
stats.chi2_contingency(crosstab)

In [None]:
conditional_prob = len(data_short[(data_short['dich_tone'] == 1) & (data_short['disagreement'] == 1)]) / len(data_short[data_short['disagreement'] == 1])
conditional_prob

In [None]:
crosstab = pd.crosstab(index=data_short['disagreement'], columns=data_short['dich_tone'], margins=True, 
                       margins_name='Gesamt', rownames=['Disagreement (Nein/Ja)'], colnames=['Tonfall (negativ/positiv)'])
crosstab.to_clipboard(excel=True)

In [None]:
data_short.groupby(['4: tree_id', '11: path']).ngroups

In [None]:
data.groupby(['4: tree_id', '11: path']).ngroups

In [None]:
dis_df = data_short[data_short['disagreement']==1]
dis_df.groupby(['4: tree_id', '11: path']).ngroups

In [None]:
n_dis_df = data_short[data_short['disagreement']==1]
n_dis_df.groupby(['4: tree_id', '11: path']).ngroups

In [None]:
def plot_tone(contrary):
    grouped_df = contrary.groupby(['4: tree_id', '11: path'])
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 8))
    fig. suptitle('Verlauf des Tonfalls gegnüber der Anderen Person in Zwiegeprächen,\n in denen die Gesprächsteilnehmenden unterschiedliche Meinungen haben', fontsize=18)
    y_min, y_max = -5, 3
    disagree = pd.DataFrame(columns=contrary.columns)
    for i, group in grouped_df:
        if len(group)>5:
            disagree= pd.concat([disagree, group])
    disagree = disagree.groupby(['4: tree_id', '11: path'])
    print(disagree.ngroups)
    for (name, group), ax in zip(disagree, axes.flatten()):  
        ax.plot(group['var_tone'])
        ax.axhline(y=0, color='red', linestyle='--', linewidth=0.5)
    axes[1, 3].axis('off')
    for ax in axes[:, 0:].flatten():
        ax.set_ylim(bottom=-5.5, top=3.5)
        ax.set_xticklabels([])
        ax.set_xticks([])
    for ax in axes[:, 1:].flatten():
        ax.set_yticklabels([])

    text = "3= lobend \
        2= humorvoll \
        1= höflich/respektvoll \
        \n-1=resigniert\
        -2=in Rage \
        -3=sarkastisch \
        -4=deligitimierend \
        -5=beleidigend"
    
    fig.text(0.01, 0.03, text, ha='left', va='bottom', fontsize=12)
    plt.tight_layout(rect=[0, 0.1, 1, 0.98])   
    plt.show()
plot_tone(n_dis_df)

In [None]:
grouped_df = dis_df.groupby(['4: tree_id', '11: path'])
disagree = pd.DataFrame(columns=dis_df.columns)
for i, group in grouped_df:
    if len(group)>5:
        disagree= pd.concat([disagree, group])
disagree.to_excel('disagree_long.xlsx')

In [None]:
grouped_df = n_dis_df.groupby(['4: tree_id', '11: path'])
disagree = pd.DataFrame(columns=n_dis_df.columns)
for i, group in grouped_df:
    if len(group)>4:
        disagree= pd.concat([disagree, group])
disagree.to_excel('no_disagree_long.xlsx')