In [1]:
import pandas as pd
import statsmodels.api as sm


In [2]:
df = pd.read_csv("CaseData2025.csv", sep=';')
df

Unnamed: 0,test,purchase,impressions
0,0,0,1
1,1,0,1
2,1,0,1
3,1,0,2
4,1,0,6
...,...,...,...
19995,0,0,2
19996,1,0,11
19997,1,1,1
19998,0,0,1


Ex1

In [3]:
treatment_group = df[df['test'] == 1]  # Users who saw the ad
control_group = df[df['test'] == 0]  #Users who did not see the ad

In [4]:
CR_treatment = treatment_group['purchase'].mean()
CR_control = control_group['purchase'].mean()
incremental_lift = (CR_treatment - CR_control) / CR_control * 100

print(CR_treatment)
print(CR_control)
print(incremental_lift)

0.03099906629318394
0.025412960609911054
21.981325863678812


In [5]:
X = sm.add_constant(df["test"])  
y = df["purchase"]
model = sm.Logit(y, X)
result = model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.134306
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:               purchase   No. Observations:                20000
Model:                          Logit   Df Residuals:                    19998
Method:                           MLE   Df Model:                            1
Date:                Thu, 27 Mar 2025   Pseudo R-squ.:               0.0006578
Time:                        09:14:29   Log-Likelihood:                -2686.1
converged:                       True   LL-Null:                       -2687.9
Covariance Type:            nonrobust   LLR p-value:                   0.06005
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -3.6468      0.101    -36.001      0.000      -3.845      -3.448
test           0.2044      0.

test coef is 0.2044 ==> exp(0.2044) = 1.23 ==> 23% incremental lift

however p-value and R-squared are not the best

Ex2

In [None]:
#rate of those from control group who bought
baseline_conversion_rate = df[df["test"] == 0]["purchase"].mean()
baseline_conversion_rate

0.025412960609911054

In [7]:
treatment = df[df["test"] == 1]["purchase"].mean()
lift_in_conversion = treatment - baseline_conversion_rate
lift_in_conversion

0.005586105683272887

Ex3

In [8]:
net_contribution_per_purchase = 300 
cost_per_1000_impressions = 100

def compute_profitability(data):
    conversion_rate_treatment = data[data["test"] == 1]["purchase"].mean()
    conversion_rate_control = data[data["test"] == 0]["purchase"].mean()

    absolute_lift = conversion_rate_treatment - conversion_rate_control

    num_treatment = data[data["test"] == 1].shape[0]
    num_control = data[data["test"] == 0].shape[0]

    incremental_conversions = absolute_lift * num_treatment
    incremental_revenue = incremental_conversions * net_contribution_per_purchase
    
    total_impressions = data[data["test"] == 1]["impressions"].sum()
    
    advertising_cost = (cost_per_1000_impressions * total_impressions) / 1000
    net_profit = incremental_revenue - advertising_cost
    
    return {
        "incremental_conversions": incremental_conversions,
        "incremental_revenue": incremental_revenue,
        "advertising_cost": advertising_cost,
        "net_profit": net_profit,
        "profitable": net_profit > 0
    }

profitability_results = compute_profitability(df)

print(f"Incremental Conversions: {profitability_results['incremental_conversions']:.2f}")
print(f"Incremental Revenue: {profitability_results['incremental_revenue']:.2f} NOK")
print(f"Advertising Cost: {profitability_results['advertising_cost']:.2f} NOK")
print(f"Net Profit: {profitability_results['net_profit']:.2f} NOK")
print(f"Is the campaign profitable? {'Yes' if profitability_results['profitable'] else 'No'}")

Incremental Conversions: 89.74
Incremental Revenue: 26922.24 NOK
Advertising Cost: 4564.90 NOK
Net Profit: 22357.34 NOK
Is the campaign profitable? Yes


This gives a net profit of 22,357.34 NOK, confirming that the campaign was profitable.
Even with a relatively small absolute lift in conversion, the high profit margin per purchase made the campaign worthwhile.

Ex4

In [9]:
X = df[["test", "impressions"]]
X = sm.add_constant(X)
y = df["purchase"]

model = sm.Logit(y, X)
result = model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.128307
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:               purchase   No. Observations:                20000
Model:                          Logit   Df Residuals:                    19997
Method:                           MLE   Df Model:                            2
Date:                Thu, 27 Mar 2025   Pseudo R-squ.:                 0.04530
Time:                        09:55:53   Log-Likelihood:                -2566.1
converged:                       True   LL-Null:                       -2687.9
Covariance Type:            nonrobust   LLR p-value:                 1.335e-53
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -3.8466      0.104    -36.998      0.000      -4.050      -3.643
test            0.1779    

Coefficient for impressions: 0.0481 This is positive, which means: More impressions → higher probability of purchase. For each additional ad impression, the log-odds of purchase increase by 0.0481. This means exposing users to more ads does have a real effect — and the effect is strong and reliable. However, for treatment: coefficient = 0.1779 (positive) P-value = 0.114 → not statistically significant (p > 0.05). This suggests that just being in the treatment group (seeing at least 1 ad) doesn’t have a strong enough effect by itself, but the number of times a user sees the ad does.



Ex5

In [11]:
non_compliant_control = df[(df["test"] == 0) & (df["impressions"] > 0)].shape[0]
total_control = df[df["test"] == 0].shape[0]
print(f"Non-compliant control users: {non_compliant_control}")
print(f"Total control users: {total_control}")

Non-compliant control users: 3935
Total control users: 3935


In [13]:
non_compliant_treatment = df[(df["test"] == 1) & (df["impressions"] == 0)].shape[0]
non_compliant_treatment


0

In [14]:
non_compliant = df[(df["impressions"] == 0)].shape[0]
non_compliant

0

Literally everyone got impressions from the adds

Ex6

Writing blah blah blah