-
Notifications
You must be signed in to change notification settings - Fork 0
/
click_install.py
57 lines (45 loc) · 1.39 KB
/
click_install.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
'''
This script is to plot the distribution of the click to install time of the Installation
raw data report on AppsFlyer dashboard
'''
import os
from dateutil.parser import parse
from pandas import DataFrame, Series
import pandas as pd
import matplotlib.pyplot as plt
# time2 - time1
def time_diff(time1, time2):
result = []
for (c1,c2) in zip(time1, time2):
result.append(parse(c2) - parse(c1))
return result
# process the first csv found. Need to change
filepaths = os.listdir('.') # all files in the current directory
for filepath in filepaths:
if '.csv' in filepath:
path = filepath
break
data = pd.read_csv(path)
''' Plot the click to install time distribution '''
clicks = data['Click Time']
installs = data['Install Time']
diff = time_diff(clicks, installs)
# change from datetime object to actual minutes
diff_min = Series([d.seconds/60 for d in diff])
# Get the value count
diff_min_counter = diff_min.value_counts()
# reindex to make sure no index is missing
click_install_dist = diff_min_counter.reindex(range(0,max(diff_min_counter.index)))
''' Calculate the IP distribution '''
ips = data['IP']
ip_counter = ips.value_counts()
''' Plot '''
plt.subplot(2,1,1)
click_install_dist.plot()
plt.title('Click to Install time distribution')
plt.subplot(2, 1, 2)
ip_counter.plot()
plt.title('IP distribution')
plt.tight_layout()
plt.show()