In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency


In [3]:
df = pd.read_csv("cookie_cats.csv")
df.head()

Unnamed: 0,userid,version,sum_gamerounds,retention_1,retention_7
0,116,gate_30,3,False,False
1,337,gate_30,38,True,False
2,377,gate_40,165,True,False
3,483,gate_40,1,False,False
4,488,gate_40,179,True,True


###

### Understand and Prepare Data
userid: Unique identifier for each user.
version: The group the user belongs to (gate_30 or gate_40).
sum_gamerounds: Total game rounds played by the user after the gate was introduced.
retention_1: Whether the user came back and played the game 1 day after installation.
retention_7: Whether the user came back and played the game 7 days after installation.

## Data Analysis

In [7]:
# Calculate retention rates for each group
group_a = df[df['version'] == 'gate_30']
group_b = df[df['version'] == 'gate_40']
retention_1_day_a = group_a['retention_1'].mean()
retention_1_day_b = group_b['retention_1'].mean()
retention_7_days_a = group_a['retention_7'].mean()
retention_7_days_b = group_b['retention_7'].mean()

print(f"1-Day Retention for gate_30: {retention_1_day_a:.2%}")
print(f"1-Day Retention for gate_40: {retention_1_day_b:.2%}")
print(f"7-Day Retention for gate_30: {retention_7_days_a:.2%}")
print(f"7-Day Retention for gate_40: {retention_7_days_b:.2%}")

1-Day Retention for gate_30: 44.82%
1-Day Retention for gate_40: 44.23%
7-Day Retention for gate_30: 19.02%
7-Day Retention for gate_40: 18.20%


In [6]:
group_a

Unnamed: 0,userid,version,sum_gamerounds,retention_1,retention_7
0,116,gate_30,3,False,False
1,337,gate_30,38,True,False
6,1066,gate_30,0,False,False
11,2101,gate_30,0,False,False
13,2179,gate_30,39,True,False
...,...,...,...,...,...
90179,9998576,gate_30,14,True,False
90180,9998623,gate_30,7,False,False
90182,9999178,gate_30,21,True,False
90183,9999349,gate_30,10,False,False


##  Statistical Testing

In [9]:
# Create a contingency table for retention on day 1
contingency_table_1 = [
    [group_a['retention_1'].sum(), len(group_a) - group_a['retention_1'].sum()],
    [group_b['retention_1'].sum(), len(group_b) - group_b['retention_1'].sum()]
]
contingency_table_7 = [
    [group_a['retention_7'].sum(), len(group_a) - group_a['retention_7'].sum()],
    [group_b['retention_7'].sum(), len(group_b) - group_b['retention_7'].sum()]
]
# Perform the Chi-squared test
chi1, p_value1, _, _ = chi2_contingency(contingency_table_1)
chi7, p_value7, _, _ = chi2_contingency(contingency_table_7)


print(f"Chi-squared test result for 1-day retention: p-value = {p_value1:.3f}")
print(f"Chi-squared test result for 7-day retention: p-value = {p_value7:.3f}")


Chi-squared test result for 1-day retention: p-value = 0.076
Chi-squared test result for 7-day retention: p-value = 0.002


## analysing the result :
#### Retention at 1 Day (p-value = 0.076):
The p-value greater than 0.05 suggests that there is no statistically significant difference between groups for retention at 1 day. This implies that changing the level of the door does not affect the probability of players returning the next day.

#### Retention at 7 Day (p-value = 0.002):
A p-value well below 0.05 indicates a statistically significant difference in 7-day retention between the two groups. This suggests that gate location has a significant impact on player retention over a longer period of time..

## Actionable Insights
#### 1. Optimize Door Positioning for Long-Term Retention
Since the results show a significant impact of gate location on 7-day retention, you should consider adopting the location that promoted better retention as standard in your game. If, for example, gate_40 shows better retention than gate_30, it would be wise to place the gate at level 40 in all instances of the game to promote better user retention.

#### 2. Reconsider Changes for 1-Day Retention
Since no significant differences were observed for 1-day retention, it is possible that door location did not affect players' decisions to return the next day. You might consider other factors that might influence short-term retention, such as initial rewards, tutorials, initial game engagement, or changes in the difficulty of initial levels.