# Mobile Games A/B Testing - Cookie Cats

## 1. Load and Explore the Data

In [None]:
# Import pandas
import pandas as pd

# Load dataset
df = pd.read_csv("../input/mobile-games-ab-testing-cookie-cats/cookie_cats.csv")

In [None]:
# Show the first few rows
df.head(10)

In [None]:
# Get the data information
df.info()

In [None]:
# Counting the number of players in each group (A/B).
df.groupby('version').count()

## 2. Retention Rate

In [None]:
# Calcualte the percentage of users that came back and played 1 day after installing
df['retention_1'].sum()/df['retention_1'].count()

In [None]:
# Calcualte the percentage of users that came back and played 7 days after installing
df['retention_7'].sum()/df['retention_7'].count()

In [None]:
# Calculating 1-day retention rate for each A/B group
df.groupby('version')['retention_1'].sum()/df.groupby('version')['retention_1'].count()

In [None]:
# Calculating 1-day retention rate for each A/B group
df.groupby('version')['retention_7'].sum()/df.groupby('version')['retention_7'].count()

## 3. Create Crosstab for A/B Testing

In [None]:
# Create cross table: retention grouped by version
crossed_1 = pd.crosstab(df.version, df.retention_1)
crossed_1

In [None]:
# Create cross table of retention_7 grouped by version
crossdata = pd.crosstab(df.version, df.retention_7)
crossdata

## 4. A/B Test: Chi-square test

In [None]:
# Import libraries for chi-square test
import numpy as np
import scipy as sp
import scipy.stats

In [None]:
# Calculate chi-square,p-value, degree of freedom, and expected
x2, p, dof, expected = sp.stats.chi2_contingency(crossdata,correction=False)

In [None]:
# Test the hypothesis by chi-square test
print("Chi-square: %(x2)s" %locals() )
print("p-value: %(p)s" %locals() )
print("Degree of freedom: %(dof)s" %locals() )
print(expected)

if p < 0.01:
    print("Significant at 1% level of significance")
elif p < 0.05:
    print("Significant at 5% level of significance")
else:
    print("Not significant at 5% level of significance")

### Conclusion

The retention rate is higher in the test group than in the control group (significant level: 1%)

So, we should not keep the gate at level 30.**