# Statistical tests on search and recommendations between start, end of phase 1 and end of phase 2

This notebook analyzes differences in search results and recommendation at different points of the experiment. It compares start of phase 1, end of phase 1, and end of phase 2. It evaluates Mann-Whitney U test with Bonferonni correction for statistical significance evaluation.

In [1]:
import pandas as pd

# Search results

## Preparing search results dataset and functions

In [6]:
# load search results from CSV
search_results = pd.read_csv('../Data/search_results.csv', index_col=0)

num_results = 10

# Filter dataset of search results to only contain data from start of phase 1, end of phase 1, and end of phase 2
filtered_search_results = search_results.loc[
    (
        (search_results['experiment'] != 'vaccines') &
        search_results['seed_sequence'].isin([0, 40, 80])
    ) | (
        (search_results['experiment'] == 'vaccines') &
        search_results['seed_sequence'].isin([0, 38, 78])
    )
]
filtered_search_results = filtered_search_results.loc[
    ~filtered_search_results['experiment'].str.contains('likes')
]
filtered_search_results = filtered_search_results.groupby([
    'experiment', 'query', 'bot_id', 'seed_sequence'
]).head(num_results)

# Calculate SERP-MS scores
serp = filtered_search_results.groupby([
    'topic', 'query', 'sequence_name', 'seed_sequence', 'bot_id'
]).apply(
    lambda block: block.apply(
        lambda row: (
            row['normalized_annotation'] * (num_results - row['position'] + 1)
        ),
        axis=1
    ).sum() / (
        ((num_results * (num_results + 1)) / 2)
    )
).rename('serp').reset_index()

# compare function for statistical significance testing
def compare(data1_df, data2_df, groups, data1_label, data2_label):
    print(f'Difference of {data1_label} and {data2_label}')
    print()

    import matplotlib.pyplot as plt
    from scipy.stats import mannwhitneyu
    
    alpha = 0.05
    # apply Bonferroni correction to alpha
    alpha /= len(groups)
    print('alpha', alpha)

    results = {}
    for group in groups:
        data1 = data1_df.loc[
            (data1_df['group'] == group)
        ]['value']

        data2 = data2_df.loc[
            (data2_df['group'] == group)
        ]['value']

        if len(set(data1)) == 1 or len(set(data2)) == 1:
            print(f'{group} All data identical')
            continue

        stat, p = mannwhitneyu(data1, data2, alternative='two-sided')

        results[group] = {
            f'{data1_label} mean': round(data1.mean() * 100) / 100,
            f'{data2_label} mean': round(data2.mean() * 100) / 100,
            f'{data1_label} std': round(data1.std() * 100) / 100,
            f'{data2_label} std': round(data2.std() * 100) / 100,
            f'{data1_label} count': len(data1),
            f'{data2_label} count': len(data2),
            'Statistics': stat,
            'p-value': p,
            'conclusion': 'Same distribution (fail to reject H0)' if p > alpha else 'Different distribution (reject H0)'
        }

    return pd.DataFrame(results).T

## Comparing search results from start of phase 1 and end of phase 1

In [10]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='SERP start',
    data2_label='SERP end 1'
)

Difference of SERP start and SERP end 1

alpha 0.01


Unnamed: 0,SERP start mean,SERP end 1 mean,SERP start std,SERP end 1 std,SERP start count,SERP end 1 count,Statistics,p-value,conclusion
chemtrails,-0.45,-0.47,0.14,0.18,50,50,1444.5,0.177164,Same distribution (fail to reject H0)
911,-0.07,-0.06,0.06,0.05,50,50,1108.0,0.308847,Same distribution (fail to reject H0)
vaccines,-0.6,-0.63,0.1,0.1,50,45,1283.5,0.235788,Same distribution (fail to reject H0)
flatearth,-0.27,-0.41,0.36,0.42,50,50,1737.5,0.000720985,Different distribution (reject H0)
moonlanding,-0.57,-0.57,0.15,0.14,50,50,1305.0,0.700635,Same distribution (fail to reject H0)


In [11]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'A: start')
    ].rename(columns={'query': 'group', 'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'query': 'group', 'serp': 'value'}),
    
    groups=filtered_search_results['query'].unique(),
    data1_label='SERP start',
    data2_label='SERP end 1'
)

Difference of SERP start and SERP end 1

alpha 0.002
chemtrail pilot speaks out All data identical
chemtrail documentary All data identical
Chemtrails All data identical
Chemtrail flu All data identical
9/11 tribute All data identical
9/11 conspiracy All data identical
9/11 calls from plane All data identical
9/11 inside job All data identical
anti vaccination All data identical
anti vaxxers All data identical
anti vaccine All data identical
anti vax All data identical
flat earth proof All data identical
earth is flat All data identical
flat earth All data identical
moon hoax All data identical
moon landing hoax All data identical
moon landing All data identical
moon landing fake All data identical
moon landing conspiracy All data identical


Unnamed: 0,SERP start mean,SERP end 1 mean,SERP start std,SERP end 1 std,SERP start count,SERP end 1 count,Statistics,p-value,conclusion
Chemtrail,-0.55,-0.61,0.03,0.02,10,10,94.5,0.000425548,Different distribution (reject H0)
9/11,-0.13,-0.07,0.05,0.04,10,10,8.0,0.000647308,Different distribution (reject H0)
Vaccines,-0.44,-0.48,0.07,0.12,10,9,62.0,0.172158,Same distribution (fail to reject H0)
flat earth paradise,-0.14,-0.16,0.05,0.01,10,10,69.5,0.130037,Same distribution (fail to reject H0)
flat earth british,0.38,0.3,0.09,0.04,10,10,68.0,0.161954,Same distribution (fail to reject H0)


## Comparing search results from end of phase 1 and end of phase 2

In [18]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='SERP end 1',
    data2_label='SERP end 2'
)

Difference of SERP end 1 and SERP end 2

alpha 0.01


Unnamed: 0,SERP end 1 mean,SERP end 2 mean,SERP end 1 std,SERP end 2 std,SERP end 1 count,SERP end 2 count,Statistics,p-value,conclusion
chemtrails,-0.47,-0.49,0.18,0.19,50,50,1465.0,0.13172,Same distribution (fail to reject H0)
911,-0.06,-0.11,0.05,0.12,50,50,1480.0,0.098797,Same distribution (fail to reject H0)
vaccines,-0.63,-0.68,0.1,0.06,45,45,1325.5,0.0108576,Same distribution (fail to reject H0)
flatearth,-0.41,-0.45,0.42,0.38,50,50,1388.0,0.336876,Same distribution (fail to reject H0)
moonlanding,-0.57,-0.59,0.14,0.13,50,50,1540.0,0.0378587,Same distribution (fail to reject H0)


## Comparing search results from start and end of phase 2

In [19]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'serp': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='SERP start',
    data2_label='SERP end 2'
)

Difference of SERP start and SERP end 2

alpha 0.01


Unnamed: 0,SERP start mean,SERP end 2 mean,SERP start std,SERP end 2 std,SERP start count,SERP end 2 count,Statistics,p-value,conclusion
chemtrails,-0.45,-0.49,0.14,0.19,50,50,1585.0,0.0193648,Same distribution (fail to reject H0)
911,-0.07,-0.11,0.06,0.12,50,50,1380.0,0.351677,Same distribution (fail to reject H0)
vaccines,-0.6,-0.68,0.1,0.06,50,45,1608.5,0.000295595,Different distribution (reject H0)
flatearth,-0.27,-0.45,0.36,0.38,50,50,1795.5,0.000159655,Different distribution (reject H0)
moonlanding,-0.57,-0.59,0.15,0.13,50,50,1600.0,0.0136509,Same distribution (fail to reject H0)


## Comparing search results disregarding topics

In [20]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    groups=['All'],
    data1_label='SERP start',
    data2_label='SERP end 1'
)

Difference of SERP start and SERP end 1

alpha 0.05


Unnamed: 0,SERP end 1 count,SERP end 1 mean,SERP end 1 std,SERP start count,SERP start mean,SERP start std,Statistics,conclusion,p-value
All,245,-0.42,0.3,250,-0.39,0.28,34118.5,Different distribution (reject H0),0.0279798


In [21]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    groups=['All'],
    data1_label='SERP end 1',
    data2_label='SERP end 2'
)

Difference of SERP end 1 and SERP end 2

alpha 0.05


Unnamed: 0,SERP end 1 count,SERP end 1 mean,SERP end 1 std,SERP end 2 count,SERP end 2 mean,SERP end 2 std,Statistics,conclusion,p-value
All,245,-0.42,0.3,245,-0.46,0.29,32417.5,Same distribution (fail to reject H0),0.124426


In [22]:
compare(
    data1_df=serp.loc[
        (serp['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    data2_df=serp.loc[
        (serp['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'serp': 'value'}),
    
    groups=['All'],
    data1_label='SERP start',
    data2_label='SERP end 2'
)

Difference of SERP start and SERP end 2

alpha 0.05


Unnamed: 0,SERP end 2 count,SERP end 2 mean,SERP end 2 std,SERP start count,SERP start mean,SERP start std,Statistics,conclusion,p-value
All,245,-0.46,0.29,250,-0.39,0.28,36515,Different distribution (reject H0),0.000210602


# Recommendations

## Preparing recommendations dataset and functions

In [36]:
# load recommendations from CSV
recommendations = pd.read_csv('../Data/recommendations.csv', index_col=0)

# filter annotated recommendations
filtered_recommendations = recommendations.loc[recommendations.apply(
    lambda row: row['seed_sequence'] in [1, 2, 39, 40, 79, 80],
    axis=1
)]
filtered_recommendations = filtered_recommendations.loc[filtered_recommendations['annotation'] > -2]
filtered_recommendations = filtered_recommendations.loc[~filtered_recommendations['experiment'].str.contains('likes')]
filtered_recommendations = filtered_recommendations.groupby([
    'experiment', 'bot_id', 'seed_sequence'
]).head(10)

# group recommendations by recommendation sequence
filtered_recommendations_grouped = filtered_recommendations.groupby([
    'experiment', 'bot_id', 'seed_sequence', 'topic', 'sequence_name'
])['normalized_annotation'].mean().reset_index()

## Compare start of phase 1 and end of phase 1

In [35]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='Recommendations start',
    data2_label='Recommendations end 1'
)

Difference of Recommendations start and Recommendations end 1

alpha 0.01


Unnamed: 0,Recommendations start mean,Recommendations end 1 mean,Recommendations start std,Recommendations end 1 std,Recommendations start count,Recommendations end 1 count,Statistics,p-value,conclusion
chemtrails,0.0,0.05,0.04,0.16,20,20,195.0,0.885966,Same distribution (fail to reject H0)
911,0.1,0.42,0.19,0.19,20,20,45.5,2.63704e-05,Different distribution (reject H0)
vaccines,-0.1,0.04,0.12,0.12,20,18,74.5,0.00159445,Different distribution (reject H0)
flatearth,-0.17,-0.06,0.18,0.19,20,20,141.5,0.10469,Same distribution (fail to reject H0)
moonlanding,-0.2,-0.4,0.37,0.17,20,20,268.0,0.0641742,Same distribution (fail to reject H0)


## Compare end of phase 1 and end of phase 2

In [29]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='Recommendations end 1',
    data2_label='Recommendations end 2'
)

Difference of Recommendations end 1 and Recommendations end 2

alpha 0.01


Unnamed: 0,Recommendations end 1 mean,Recommendations end 2 mean,Recommendations end 1 std,Recommendations end 2 std,Recommendations end 1 count,Recommendations end 2 count,Statistics,p-value,conclusion
chemtrails,0.05,-0.15,0.16,0.18,20,20,323,0.000642384,Different distribution (reject H0)
911,0.42,0.07,0.19,0.14,20,20,372,2.93041e-06,Different distribution (reject H0)
vaccines,0.04,-0.37,0.12,0.24,18,18,310,2.49307e-06,Different distribution (reject H0)
flatearth,-0.06,-0.47,0.19,0.19,20,20,375,1.87011e-06,Different distribution (reject H0)
moonlanding,-0.4,-0.42,0.17,0.11,20,20,243,0.23945,Same distribution (fail to reject H0)


## Compare start of phase 1 and end of phase 2

In [30]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'normalized_annotation': 'value'}),
    
    groups=filtered_search_results['topic'].unique(),
    data1_label='Recommendations start',
    data2_label='Recommendations end 2'
)

Difference of Recommendations start and Recommendations end 2

alpha 0.01


Unnamed: 0,Recommendations start mean,Recommendations end 2 mean,Recommendations start std,Recommendations end 2 std,Recommendations start count,Recommendations end 2 count,Statistics,p-value,conclusion
chemtrails,0.0,-0.15,0.04,0.18,20,20,330.0,0.000164933,Different distribution (reject H0)
911,0.1,0.07,0.19,0.14,20,20,216.5,0.651358,Same distribution (fail to reject H0)
vaccines,-0.1,-0.37,0.12,0.24,20,18,307.5,0.000160187,Different distribution (reject H0)
flatearth,-0.17,-0.47,0.18,0.19,20,20,347.0,6.3197e-05,Different distribution (reject H0)
moonlanding,-0.2,-0.42,0.37,0.11,20,20,290.0,0.0142997,Same distribution (fail to reject H0)


## Compare recommendations disregarding topics

In [31]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    groups=['All'],
    data1_label='Recommendations start',
    data2_label='Recommendations end 1'
)

Difference of Recommendations start and Recommendations end 1

alpha 0.05


Unnamed: 0,Recommendations end 1 count,Recommendations end 1 mean,Recommendations end 1 std,Recommendations start count,Recommendations start mean,Recommendations start std,Statistics,conclusion,p-value
All,98,0.01,0.31,100,-0.07,0.24,4085,Different distribution (reject H0),0.0396959


In [32]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    groups=['All'],
    data1_label='Recommendations end 1',
    data2_label='Recommendations end 2'
)

Difference of Recommendations end 1 and Recommendations end 2

alpha 0.05


Unnamed: 0,Recommendations end 1 count,Recommendations end 1 mean,Recommendations end 1 std,Recommendations end 2 count,Recommendations end 2 mean,Recommendations end 2 std,Statistics,conclusion,p-value
All,98,0.01,0.31,98,-0.27,0.27,7179.5,Different distribution (reject H0),1.7585e-09


In [33]:
compare(
    data1_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    data2_df=filtered_recommendations_grouped.loc[
        (filtered_recommendations_grouped['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'normalized_annotation': 'value'}),
    
    groups=['All'],
    data1_label='Recommendations start',
    data2_label='Recommendations end 2'
)

Difference of Recommendations start and Recommendations end 2

alpha 0.05


Unnamed: 0,Recommendations end 2 count,Recommendations end 2 mean,Recommendations end 2 std,Recommendations start count,Recommendations start mean,Recommendations start std,Statistics,conclusion,p-value
All,98,-0.27,0.27,100,-0.07,0.24,6940.5,Different distribution (reject H0),2.87893e-07


# Home page results

Please note that home page results were annotated by a trained machine learning model. Due to ethical risks, we decided not to publish the automatically produced annotations. The below results therefore can not be re-run in this notebook.

## Compare start of phase 1 and end of phase 1

In [56]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    groups=filtered_home_page_grouped['topic'].unique(),
    data1_label='Home page start',
    data2_label='Home page end 1'
)

Difference of Home page start and Home page end 1

alpha 0.01


Unnamed: 0,Home page start mean,Home page end 1 mean,Home page start std,Home page end 1 std,Home page start count,Home page end 1 count,Home page start promoting,Home page end 1 promoting,Home page start debunking,Home page end 1 debunking,Statistics,p-value,conclusion
911,0.02,0.26,0.04,0.08,20,20,0.0,0.0,0.0,0.0,5.0,0.0,Different distribution (reject H0)
chemtrails,0.04,0.03,0.07,0.07,20,20,0.0,0.0,0.0,0.0,208.0,0.813577,Same distribution (fail to reject H0)
flatearth,0.0,0.01,0.06,0.15,20,20,0.0,0.0,0.0,0.0,197.0,0.942925,Same distribution (fail to reject H0)
moonlanding,-0.02,-0.14,0.05,0.07,20,20,0.0,0.0,0.0,0.0,377.0,0.0,Different distribution (reject H0)
vaccines,-0.02,0.02,0.06,0.12,20,18,0.0,0.0,0.0,0.0,138.5,0.203447,Same distribution (fail to reject H0)


## Compare end of phase 1 and end of phase 2

In [57]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'B: end of phase 1')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    groups=filtered_home_page_grouped['topic'].unique(),
    data1_label='Home page end 1',
    data2_label='Home page end 2'
)

Difference of Home page end 1 and Home page end 2

alpha 0.01


Unnamed: 0,Home page end 1 mean,Home page end 2 mean,Home page end 1 std,Home page end 2 std,Home page end 1 count,Home page end 2 count,Home page end 1 promoting,Home page end 2 promoting,Home page end 1 debunking,Home page end 2 debunking,Statistics,p-value,conclusion
911,0.26,0.06,0.08,0.1,20,20,0.0,0.0,0.0,0.0,370.0,3e-06,Different distribution (reject H0)
chemtrails,0.03,-0.32,0.07,0.09,20,20,0.0,0.0,0.0,0.0,399.0,0.0,Different distribution (reject H0)
flatearth,0.01,-0.26,0.15,0.11,20,20,0.0,0.0,0.0,0.0,371.0,3e-06,Different distribution (reject H0)
moonlanding,-0.14,-0.3,0.07,0.12,20,20,0.0,0.0,0.0,0.0,348.0,3e-05,Different distribution (reject H0)
vaccines,0.02,-0.11,0.12,0.06,18,9,0.0,0.0,0.0,0.0,131.0,0.008602,Different distribution (reject H0)


## Compare start of phase 1 and end of phase 2

In [58]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'A: start')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'C: end of phase 2')
    ].rename(columns={'topic': 'group', 'video_label_value': 'value'}),
    
    groups=filtered_home_page_grouped['topic'].unique(),
    data1_label='Home page start',
    data2_label='Home page end 2'
)

Difference of Home page start and Home page end 2

alpha 0.01


Unnamed: 0,Home page start mean,Home page end 2 mean,Home page start std,Home page end 2 std,Home page start count,Home page end 2 count,Home page start promoting,Home page end 2 promoting,Home page start debunking,Home page end 2 debunking,Statistics,p-value,conclusion
911,0.02,0.06,0.04,0.1,20,20,0.0,0.0,0.0,0.0,142.5,0.092467,Same distribution (fail to reject H0)
chemtrails,0.04,-0.32,0.07,0.09,20,20,0.0,0.0,0.0,0.0,400.0,0.0,Different distribution (reject H0)
flatearth,0.0,-0.26,0.06,0.11,20,20,0.0,0.0,0.0,0.0,395.5,0.0,Different distribution (reject H0)
moonlanding,-0.02,-0.3,0.05,0.12,20,20,0.0,0.0,0.0,0.0,394.5,0.0,Different distribution (reject H0)
vaccines,-0.02,-0.11,0.06,0.06,20,9,0.0,0.0,0.0,0.0,146.5,0.00396,Different distribution (reject H0)


## Compare home page results disregarding topics

In [73]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    groups=['All'],
    data1_label='Home page start',
    data2_label='Home page end 1'
)

Difference of Home page start and Home page end 1

alpha 0.05


Unnamed: 0,Home page end 1 count,Home page end 1 mean,Home page end 1 std,Home page start count,Home page start mean,Home page start std,Statistics,conclusion,p-value
All,98,0.03,0.16,100,0.0,0.06,4584.0,Same distribution (fail to reject H0),0.405771


In [75]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'B: end of phase 1')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    groups=['All'],
    data1_label='Home page end 1',
    data2_label='Home page end 2'
)

Difference of Home page end 1 and Home page end 2

alpha 0.05


Unnamed: 0,Home page end 1 count,Home page end 1 mean,Home page end 1 std,Home page end 2 count,Home page end 2 mean,Home page end 2 std,Statistics,conclusion,p-value
All,98,0.03,0.16,89,-0.19,0.18,7145.0,Different distribution (reject H0),0.0


In [76]:
compare(
    data1_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'A: start')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    data2_df=filtered_home_page_grouped.loc[
        (filtered_home_page_grouped['sequence_name'] == 'C: end of phase 2')
    ].assign(group='All').rename(columns={'video_label_value': 'value'}),
    
    groups=['All'],
    data1_label='Home page start',
    data2_label='Home page end 2'
)

Difference of Home page start and Home page end 2

alpha 0.05


Unnamed: 0,Home page end 2 count,Home page end 2 mean,Home page end 2 std,Home page start count,Home page start mean,Home page start std,Statistics,conclusion,p-value
All,89,-0.19,0.18,100,0.0,0.06,7382.5,Different distribution (reject H0),0.0
