In [None]:
!pip install pyreadstat

In [None]:
import pandas as pd
from sklearn import linear_model
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('YourFile')
df.head()
print(df['concept'].unique())

# Descriptive Statistics
Before running the models, we first want to review the descriptive statistics of our variables. The following bar charts are created:


*   Community Subscriptions per Communication Variable
*   The Count (N) For Communication Variables
*   The Count (N) For Sample Size
*   Community Subscriptions per Concept





In [None]:
#Community Subscriptions per Communication Variable
X = df[['Emotion_Fear', 'Emotion_Love', 'Topic_Sprotection', 'Topic_Affiliation', 'Topic_Kincare',
        'Appeal_Exp', 'Appeal_Testi', 'Appeal_Infor', 'Appeal_Pers', 'LStyle_Fperson', 'LStyle_Tperson']]
Y = df['ComSub_dummy']

filtered_data = df[df['ComSub_dummy'] == 1]

counts = filtered_data[X.columns].sum()

plt.figure(figsize=(12, 6))
bars = plt.bar(counts.index, counts, width=0.6,color='skyblue')

for bar in bars:
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, str(int(bar.get_height())), ha='center')

plt.xlabel('Campaign Element Variables')
plt.ylabel('Count for Community Subscriptions')
plt.title('Community Subscriptions per Campaign Element')

plt.xticks(rotation=45, ha='right')

plt.tight_layout()

plt.show()

In [None]:
#The Count (N) For Communication Variables
X = df[['Emotion_Fear', 'Emotion_Love', 'Topic_Sprotection', 'Topic_Affiliation', 'Topic_Kincare',
        'Appeal_Exp', 'Appeal_Testi', 'Appeal_Infor', 'Appeal_Pers', 'LStyle_Fperson', 'LStyle_Tperson']]

absolute_sums = X.abs().sum()

plt.figure(figsize=(10, 6))
bar_chart = plt.bar(absolute_sums.index, absolute_sums.values, color='skyblue')

for bar, value in zip(bar_chart, absolute_sums.values):
    plt.text(bar.get_x() + bar.get_width() / 2 - 0.1, bar.get_height() + 0.1, str(value), fontsize=9, ha='center')

plt.title('Sample Size (N) for Campaign Element Variables on Platform')
plt.xlabel('Campaign Element Variables')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

plt.show()

In [None]:
#The Count (N) For Sample Size
X = df['concept']
df['concept'] = pd.to_numeric(df['concept'], errors='coerce')


filtered_data = df[(df['first_visit'] == 1) & (df['concept'] != 13)]

counts = filtered_data['concept'].value_counts()

plt.figure(figsize=(12, 6))
bars = plt.bar(counts.index.astype(str), counts, color='skyblue')

for bar in bars:
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, str(int(bar.get_height())), ha='center')

plt.xlabel('Concept')
plt.ylabel('Count')
plt.title('Concept sample size (N)')

plt.xticks(rotation=45, ha='right')

plt.tight_layout()

plt.show()

In [None]:
#Community Subscriptions per Concept
X = df['concept']
Y = df['ComSub_dummy']

df['concept'] = pd.to_numeric(df['concept'], errors='coerce')

filtered_data = df[(df['ComSub_dummy'] == 1) & (df['concept'] != 13)]

counts = filtered_data['concept'].value_counts()

plt.figure(figsize=(12, 6))
bars = plt.bar(counts.index.astype(str), counts, color='skyblue')

for bar in bars:
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, str(int(bar.get_height())), ha='center')

plt.xlabel('Concept')
plt.ylabel('Count for Community Subscriptions')
plt.title('Community Subscriptions per Concept')

plt.xticks(rotation=45, ha='right')

plt.tight_layout()

plt.show()


I am running a Pearson correlation check in Pandas to determine the correlations between the independent variables.

In [None]:
correlation_matrix = df[['Emotion_Fear','Topic_Affiliation','Topic_Kincare','Appeal_Testi','Appeal_Pers','LStyle_Tperson', 'ComSub_dummy','avg_engagement_time_sec']].corr()

# Print the correlation matrix
print(correlation_matrix)
correlation_matrix.to_csv('Pearson_Correlation.csv', index=False)

# Model 1: Communication Variables main effects
This section contains out models that measure the effect the communication variables have on ComSub_dummy.

The communication variables:


*  Emotion (Love vs. Fear)
*  Topic (Self-protection vs. Affiliation vs. Kincare)
*  Appeal 1 (Expert vs. Testimonial)
*  Appeal 2 (Informative vs. Persuasive)
*  Linguistic style (First-person vs. Third-person)

In [None]:
X = df[['Emotion_Fear','Topic_Affiliation','Topic_Kincare','Appeal_Testi','Appeal_Pers','LStyle_Tperson',]]
Y = df['ComSub_dummy']

In [None]:
X = sm.add_constant(X)
logit_mod = sm.Logit(Y, X).fit()
print(logit_mod.summary())
params = logit_mod.params
conf = logit_mod.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)


# Model 2: Supportive Model with Control Variables
Control variables to include:


*   Part of week (PW): PW_weekend_dummy
*   Part of day  (PD): PD_afternoon, PD_evening_dummy, PD_night_dummy
*   Working hours (WH): WH_workhours_dummy
*   Device Category (DC): DC_desktop_dummy, DC_tablet_dummy





In [None]:
X = df[['PW_weekend_dummy','PD_afternoon_dummy','PD_evening_dummy','PD_night_dummy','WH_workhours_dummy','DC_desktop_dummy','DC_tablet_dummy']]
Y = df['ComSub_dummy']

In [None]:
X = sm.add_constant(X)
logit_mod = sm.Logit(Y, X).fit()
print(logit_mod.summary())
params = logit_mod.params
conf = logit_mod.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)



# Model 3: Communication Variables and control variables
This section contains out models that measure the effect the communication variables have on ComSub_dummy while also includig the control variables.

The communication variables:


*  Emotion (Love vs. Fear)
*  Topic (Self protection vs. Affiliation vs. Kin-Care)
*  Appeal 1 (Expert vs. Testimonial)
*  Appeal 2 (Informative vs. Persuasive)
*  Linguistic style (First-person vs. Third-person)

Control variables that are included:


*   Part of week (PW): PW_weekend_dummy
*   Part of day  (PD): PD_afternoon, PD_evening_dummy, PD_night_dummy
*   Working hours (WH): WH_workhours_dummy
*   Device Category (DC): DC_desktop_dummy, DC_tablet_dummy

In [None]:
X = df[['Emotion_Fear','Topic_Affiliation','Topic_Kincare','Appeal_Testi','Appeal_Pers','LStyle_Tperson','PW_weekend_dummy','PD_afternoon_dummy','PD_evening_dummy','PD_night_dummy','WH_workhours_dummy','DC_desktop_dummy','DC_tablet_dummy']]
Y = df['ComSub_dummy']

In [None]:
X = sm.add_constant(X)
logit_mod = sm.Logit(Y, X).fit()
print(logit_mod.summary())
params = logit_mod.params
conf = logit_mod.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)


# Model 4: Interaction effects
This section contains out of models that measure the effect of different interactions between our campaign's predictor variables on ComSub_dummy.

**The interactions:**

Emotion and Topic:
*   Emotion_Fear x Topic_Affiliation
*   Emotion_Fear x Topic_Kincare

















In [None]:


#Emotion and Topic
df['Emotion_Fear x Topic_Affiliation'] = df['Emotion_Fear'] * df['Topic_Affiliation']
df['Emotion_Fear x Topic_Kincare'] = df['Emotion_Fear'] * df['Topic_Kincare']




In [None]:
X = df[['Emotion_Fear','Topic_Affiliation','Topic_Kincare','Appeal_Testi','Appeal_Pers','LStyle_Tperson','PW_weekend_dummy','PD_afternoon_dummy','PD_evening_dummy','PD_night_dummy','WH_workhours_dummy','DC_desktop_dummy','DC_tablet_dummy','Emotion_Fear x Topic_Affiliation','Emotion_Fear x Topic_Kincare']]
Y = df['ComSub_dummy']

In [None]:
X = sm.add_constant(X)
logit_mod = sm.Logit(Y, X).fit()
print(logit_mod.summary())
params = logit_mod.params
conf = logit_mod.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)

In [None]:
X = df[['Emotion_Fear x Topic_Affiliation','Emotion_Fear x Topic_Kincare','LStyle_Tperson x Topic_Affiliation','LStyle_Tperson x Topic_Kincare','Appeal_Testi x LStyle_Tperson','Appeal_Pers x Topic_Affiliation','Appeal_Pers x Topic_Kincare','PW_weekend_dummy','PD_afternoon_dummy','PD_evening_dummy','PD_night_dummy','WH_workhours_dummy','DC_desktop_dummy','DC_tablet_dummy']]
Y = df['ComSub_dummy']

In [None]:
X = sm.add_constant(X)
logit_mod = sm.Logit(Y, X).fit()
print(logit_mod.summary())
params = logit_mod.params
conf = logit_mod.conf_int()
conf['OR'] = params
conf.columns = ["Lower CI", "Upper CI", "OR"]
np.exp(conf)