In [1]:
import pandas as pd

In [2]:
texts_col_names = ["calling", "receiving", "timestamp"]

In [3]:
texts_df = pd.read_csv("texts.csv", names=texts_col_names, parse_dates=["timestamp"])

In [4]:
texts_df.head()

Unnamed: 0,calling,receiving,timestamp
0,97424 22395,90365 06212,2016-01-09 06:03:22
1,94489 72078,92415 91418,2016-01-09 06:05:35
2,81520 43406,92421 64236,2016-01-09 06:09:34
3,97389 12538,90352 50054,2016-01-09 06:09:39
4,81515 42171,98440 02823,2016-01-09 06:13:30


In [5]:
"First record of texts, 97424 22395, texts 90365 06212 at time 01-09-2016 06:03:22"

'First record of texts, 97424 22395, texts 90365 06212 at time 01-09-2016 06:03:22'

In [6]:
texts_df.dtypes

calling              object
receiving            object
timestamp    datetime64[ns]
dtype: object

In [7]:
texts_df.shape

(9072, 3)

In [8]:
texts_df.describe()

Unnamed: 0,calling,receiving,timestamp
count,9072,9072,9072
unique,237,230,9044
top,92411 96415,92411 96415,2016-09-27 07:34:54
freq,151,135,2
first,,,2016-01-09 06:03:22
last,,,2016-12-09 23:58:09


### Calls

In [9]:
calls_col_names = ["calling", "receiving", "timestamp", "duration"]

In [10]:
calls_df = pd.read_csv("calls.csv", names=calls_col_names, parse_dates=["timestamp"])

In [11]:
calls_df.head()

Unnamed: 0,calling,receiving,timestamp,duration
0,78130 00821,98453 94494,2016-01-09 06:01:12,186
1,78298 91466,(022)28952819,2016-01-09 06:01:59,2093
2,97424 22395,(022)47410783,2016-01-09 06:03:51,1975
3,93427 40118,(080)33118033,2016-01-09 06:11:23,1156
4,90087 42537,(080)35121497,2016-01-09 06:17:26,573


In [12]:
calls_df.shape

(5213, 4)

In [13]:
calls_df.describe()

Unnamed: 0,duration
count,5213.0
mean,935.796087
std,1005.93137
min,9.0
25%,151.0
50%,584.0
75%,1323.0
max,4617.0


In [14]:
call_nums = set(calls_df.calling.unique()).union(set(calls_df.receiving.unique()))

In [15]:
text_nums = set(texts_df.calling.unique()).union(set(texts_df.receiving.unique()))

In [16]:
len(call_nums.union(text_nums))

570

# Task 2

In [23]:
calling_totals = calls_df.groupby("calling").sum().sort_values("duration", ascending=False)

In [24]:
receiving_totals = calls_df.groupby("receiving").sum().sort_values("duration", ascending=False)

In [27]:
(calling_totals.join(receiving_totals, rsuffix="_rcvng", lsuffix="_calling")
 .fillna(0)
 .assign(total_duration=lambda x: x["duration_calling"] + x["duration_rcvng"])
 .sort_values("total_duration", ascending=False)
 .head(1)
 .filter(["total_duration"])
)


Unnamed: 0_level_0,total_duration
calling,Unnamed: 1_level_1
(080)33251027,90456.0


# Task 3A

In [28]:
def find_prefix(num):
    if " " in num:
        if num[0] in {"7", "8", "9"}:
            prefix = num[:4]
    elif num[:3] == "140":
        prefix = 140
    elif num[0] == "(":
        prefix = num.split(")")[0].strip("(")
    return prefix

In [29]:
calls_df.head()

Unnamed: 0,calling,receiving,timestamp,duration
0,78130 00821,98453 94494,2016-01-09 06:01:12,186
1,78298 91466,(022)28952819,2016-01-09 06:01:59,2093
2,97424 22395,(022)47410783,2016-01-09 06:03:51,1975
3,93427 40118,(080)33118033,2016-01-09 06:11:23,1156
4,90087 42537,(080)35121497,2016-01-09 06:17:26,573


In [31]:
prefixes = calls_df.calling.apply(find_prefix).tolist()

In [32]:
calls_df["calling_prefix"] = calls_df.calling.apply(find_prefix)

In [34]:
calls_df["receiving_prefix"] = calls_df.receiving.apply(find_prefix)

In [35]:
calls_df.head()

Unnamed: 0,calling,receiving,timestamp,duration,calling_prefix,receiving_prefix
0,78130 00821,98453 94494,2016-01-09 06:01:12,186,7813,9845
1,78298 91466,(022)28952819,2016-01-09 06:01:59,2093,7829,22
2,97424 22395,(022)47410783,2016-01-09 06:03:51,1975,9742,22
3,93427 40118,(080)33118033,2016-01-09 06:11:23,1156,9342,80
4,90087 42537,(080)35121497,2016-01-09 06:17:26,573,9008,80


In [52]:
part_a = calls_df.query("calling_prefix == '080'").filter(["receiving_prefix"]).sort_values("receiving_prefix").squeeze().unique().tolist()

In [54]:
part_a_check = '''
022
040
04344
044
04546
0471
080
0821
7406
7795
7813
7829
8151
8152
8301
8431
8714
9008
9019
9035
9036
9241
9242
9341
9342
9343
9400
9448
9449
9526
9656
9738
9740
9741
9742
9844
9845
9900
9961
'''

In [59]:
part_a_check = part_a_check.split("\n")[1:-1]

In [60]:
part_a == part_a_check

True

# Task 3B

In [63]:
total_bang_calls = calls_df.query("calling_prefix == '080'")

In [65]:
total_bang_calls.shape

(1080, 6)

In [66]:
total_bang_calls.head()

Unnamed: 0,calling,receiving,timestamp,duration,calling_prefix,receiving_prefix
6,(080)45291968,90365 06212,2016-01-09 06:30:36,9,80,9036
11,(080)62164823,74066 93594,2016-01-09 06:52:07,300,80,7406
17,(080)67362492,(04344)316423,2016-01-09 07:24:45,2258,80,4344
21,(080)69245029,(044)49481100,2016-01-09 07:34:19,9,80,44
26,(080)47459867,98440 65896,2016-01-09 08:08:59,2147,80,9844


In [69]:
total_bang_calls.query("receiving_prefix == '080'").shape

(268, 6)

In [72]:
round((total_bang_calls.query("receiving_prefix == '080'").shape[0] / total_bang_calls.shape[0]) * 100, 2)

24.81

# Task 4

In [73]:
whitelist = set()

In [83]:
whitelist = set(texts_df["calling"].unique()).union(set(texts_df["receiving"].unique())).union(set(calls_df["receiving"].unique()))

In [89]:
task_4_result = sorted(list(set(calls_df["calling"].unique()) - whitelist))

In [90]:
task_4_result

['(022)37572285',
 '(022)65548497',
 '(022)68535788',
 '(022)69042431',
 '(040)30429041',
 '(044)22020822',
 '(0471)2171438',
 '(0471)6579079',
 '(080)20383942',
 '(080)25820765',
 '(080)31606520',
 '(080)40362016',
 '(080)60463379',
 '(080)60998034',
 '(080)62963633',
 '(080)64015211',
 '(080)69887826',
 '(0821)3257740',
 '1400481538',
 '1401747654',
 '1402316533',
 '1403072432',
 '1403579926',
 '1404073047',
 '1404368883',
 '1404787681',
 '1407539117',
 '1408371942',
 '1408409918',
 '1408672243',
 '1409421631',
 '1409668775',
 '1409994233',
 '74064 66270',
 '78291 94593',
 '87144 55014',
 '90351 90193',
 '92414 69419',
 '94495 03761',
 '97404 30456',
 '97407 84573',
 '97442 45192',
 '99617 25274']

In [91]:
task_4_check = '''(022)37572285
(022)65548497
(022)68535788
(022)69042431
(040)30429041
(044)22020822
(0471)2171438
(0471)6579079
(080)20383942
(080)25820765
(080)31606520
(080)40362016
(080)60463379
(080)60998034
(080)62963633
(080)64015211
(080)69887826
(0821)3257740
1400481538
1401747654
1402316533
1403072432
1403579926
1404073047
1404368883
1404787681
1407539117
1408371942
1408409918
1408672243
1409421631
1409668775
1409994233
74064 66270
78291 94593
87144 55014
90351 90193
92414 69419
94495 03761
97404 30456
97407 84573
97442 45192
99617 25274'''

In [94]:
task_4_check = task_4_check.split("\n")

In [95]:
task_4_result == task_4_check

True