-
Notifications
You must be signed in to change notification settings - Fork 0
/
defs.py
88 lines (74 loc) · 3.34 KB
/
defs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import petl as etl
import string,random
ALLOWED_EXTENSIONS = set(['csv'])
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
def randomword(length):
return ''.join(random.choice(string.lowercase) for i in range(length))
def translate(cell):
# Get rid of the useless stuff and output (partially) RTM ready values
# Goal ID 5 becomes goal5
if cell=='E-Commerce':
return 'transaction'
else :
return str.lower(cell[:4]+cell[8:])
def prep_file(csvfile):
#Generate list of column we keep (View ID + All the Goals-Ecommerce)
#Output :
"""
+---------+-------------+--------------------------+
| View ID | variable | value |
+=========+=============+==========================+
| 2405654 | transaction | Conversion + Revenue |
+---------+-------------+--------------------------+
| 2405654 | goal1 | Not Used / Do not Import |
+---------+-------------+--------------------------+
| 2405654 | goal2 | Conversion |
+---------+-------------+--------------------------+
| 2405654 | goal3 | Not Used / Do not Import |
+---------+-------------+--------------------------+
| 2405654 | goal4 | Not Used / Do not Import |
+---------+-------------+--------------------------+
"""
cuts=[2,]+[i for i in range(4,25)]
# Load file, keep the right columns,melt (aka switch columns to rows),
# convert them as string, rename the Goals into a CSMapping ready and convert the View to an int
return etl.fromcsv(csvfile).cut(cuts).melt('View ID').convertall(str).convert('variable', lambda x : translate(x[18:-1])).convert('View ID',int)
def filter_agg(etlt):
# Expects a prepped file
# Filter out the goals not used then aggregate them by type (aka conv or conv+rev)
# Output :
""""
+------------+----------------------+-----------------+
| profileID | type | goals |
+============+======================+=================+
| 2405654 | Conversion | ['goal2'] |
+------------+----------------------+-----------------+
| 2405654 | Conversion + Revenue | ['transaction'] |
+------------+----------------------+-----------------+
| 84468465 | Conversion | ['goal2'] |
+------------+----------------------+-----------------+
| 84468465 | Conversion + Revenue | ['transaction'] |
+------------+----------------------+-----------------+
| 7885855456 | Conversion + Revenue | ['transaction'] |
+------------+----------------------+-----------------+
"""
return etlt.select('value',lambda x : x=='Conversion' or x=='Conversion + Revenue').aggregate(('View ID','value'),list,'variable').setheader(['profileID','type','goals'])
def meta_table(etlt):
# Expects an aggregated file
# Returns a table of profileIDs grouped by their conversion mapping
# Output :
"""
+---------------------+
| value |
+=====================+
| [2405654, 84468465] | > Those 2 profileIDs share the same structure
+---------------------+
| [7885855456] | > This one has a different one
+---------------------+
"""
return etlt.aggregate('profileID',list,('type','goals')).aggregate('value',list,'profileID').cut(1)
def get_goals(profileIDs,etlt):
mylist=list(etlt.selectin('profileID',profileIDs).cut(1,2).dicts())
return {x['type']:x['goals'] for x in mylist}