# Looking for Proxy Metrics

## Import libraries

In [6]:
#!pip install scipy matplotlib statsmodels seaborn

In [5]:
import numpy as np
import pandas as pd
import scipy.stats as ss
import matplotlib.pyplot as plt
import statsmodels.stats.api as sms
import seaborn as sns
from statsmodels.stats.proportion import proportion_confint

## Import data

In [7]:
df = pd.read_csv('proxy_metrics_homework_2.csv')

In [8]:
df

Unnamed: 0,user_id,views,contacts,last_7_views,last_3_views,last_7_contacts,last_3_contacts,live_days,messages_events,max_views_delta,add_to_favor_events,is_bought
0,151042503,84,2,84,39,2,2,8,0,6,0.0,0
1,151212422,95,0,95,36,0,0,8,0,7,0.0,0
2,151206815,118,3,118,40,3,2,8,0,2,0.0,0
3,151284254,11,0,11,2,0,0,7,0,8,0.0,0
4,151088604,48,2,48,8,2,0,8,0,1,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1195,142367169,2477,34,319,184,4,2,42,42,0,3.0,1
1196,149003341,469,15,231,168,8,8,41,9,1,0.0,1
1197,150878368,2663,23,2112,1213,23,17,10,24,0,0.0,1
1198,149351655,2673,39,1202,528,20,8,24,26,31,1.0,1


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   user_id              1200 non-null   int64  
 1   views                1200 non-null   int64  
 2   contacts             1200 non-null   int64  
 3   last_7_views         1200 non-null   int64  
 4   last_3_views         1200 non-null   int64  
 5   last_7_contacts      1200 non-null   int64  
 6   last_3_contacts      1200 non-null   int64  
 7   live_days            1200 non-null   int64  
 8   messages_events      1200 non-null   int64  
 9   max_views_delta      1200 non-null   int64  
 10  add_to_favor_events  1200 non-null   float64
 11  is_bought            1200 non-null   int64  
dtypes: float64(1), int64(11)
memory usage: 112.6 KB


## Tasks

### Task 1.

 **Find an average conversion into buying a car.**

In [10]:
df.is_bought.mean()

0.3425

### Task 2.  

**What is the average number of contacts for the entire period for those who did not buy a car?**

In [12]:
df[df['is_bought'] == 0].contacts.mean()

13.135614702154626

### Task 3.

**Calculate how many times the average number of adds to favorites (add_to_favor_events) differs for each of the two cohorts (those who bought a car and those who did not buy a car).**

In [15]:
ans_3 = df.groupby('is_bought', as_index=False).agg({'add_to_favor_events': 'mean'})

ans_3

Unnamed: 0,is_bought,add_to_favor_events
0,0,0.158428
1,1,0.318735


In [18]:
ans_3.iloc[1].add_to_favor_events / ans_3.iloc[0].add_to_favor_events

2.01185401459854

### Task 4. 

**Find the percentage of users who have not made any contacts for each of the two cohorts (those who bought a car and those who did not buy a car).**

In [33]:
not_bought = df[df['is_bought'] == 0].shape[0]
not_bought_zero_contacts = df[(df['is_bought'] == 0) & (df['contacts'] == 0)].shape[0]

bought = df[df['is_bought'] == 1].shape[0]
bought_zero_contacts = df[(df['is_bought'] == 1) & (df['contacts'] == 0)].shape[0]

print(f'Zero contacts percentage among those who did not buy a car: {not_bought_zero_contacts / not_bought: .2%}')
print(f'Zero contacts percentage among those who bought a car: {bought_zero_contacts / bought: .2%}')

Zero contacts percentage among who did not buy a car:  10.27%
Zero contacts percentage among who bought a car:  3.65%


### Task 5. 

**Find the average LT (lifetime — duration from the first day to churn) for two cohorts.**

In [44]:
df.groupby('is_bought', as_index=False).agg({'live_days': 'mean'})

Unnamed: 0,is_bought,live_days
0,0,12.724968
1,1,10.150852


### Task 6. 

**Find the ratio between views and contacts for two cohorts.**

In [48]:
(df[df['is_bought'] == 0].contacts / df[df['is_bought'] == 0].views).mean()

0.022867884495711047

In [49]:
(df[df['is_bought'] == 1].contacts / df[df['is_bought'] == 1].views).mean()

0.034810729185530236

### Task 7. 

In [None]:
tbc..