In [None]:
# import needed packages
import pandas as pd
from statsmodels.stats.contingency_tables import cochrans_q
from itertools import combinations
from statsmodels.stats.contingency_tables import mcnemar
import statsmodels.stats.multitest as smm
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
# read in the data
ifood = pd.read_csv('ifood_df.csv')

In [None]:
# use Cochran's Q test for significant differences between customer campaign responses

ifood_campaign = ifood[['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'Response']]

# Cochran's Q test
result = cochrans_q(ifood_campaign)
print(result)
# at lest one campaign has a significantly different response rate

In [None]:
# perform a post-hoc pairwise McNemar's test

# create pairwise comparisons between each campaign
pairs = list(combinations(ifood_campaign.columns, 2))
alpha = 0.05

# perform individual mcnemar tests for each campaign pair
for pair in pairs:
    table = pd.crosstab(ifood_campaign[pair[0]], ifood_campaign[pair[1]])
    result = mcnemar(table, exact=True)
    p_value = result.pvalue

    # apply Bonferroni correction (reduce type I error risk)
    p_value_adj = p_value * len(pairs)
    adjusted_alpha = alpha/len(pairs)

    # print results of each significant comparison
    if p_value_adj < adjusted_alpha :
        print(f"{pair}: adjusted p-value={p_value_adj}")

# campaign 2 is significantly different from all other campaigns (less customer responses)
# the final campaign is significantly different from all 5 other campaigns (more customer responses)
# campaigns 1, 3, 4, and 5 are not signifcantly different from each other in terms of customer responses

In [None]:
# association analysis

# get the most frequent campaign response pairs 
frequent_pairs = apriori(ifood_campaign, min_support=0.01, use_colnames=True)

# output the association rules
rules = association_rules(frequent_pairs, num_itemsets = 2205, metric="lift", min_threshold=5.0)

# filter the association rules to only include consequents with the response variable
rules_response = rules[rules['consequents'].apply(lambda x: 'Response' in x)]

# output the final rules table (sorted by confidence)
rules_table = rules_response[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values(by='confidence', ascending=False)
print(rules_table)

# customers responses to campaigns 1, 4, and 5 are usually associated with responding to the final campaign