In [155]:
""" 
initialisation - imports and data loading
"""
# imports

import pandas
import datetime

# load data files and set columns names

events = pandas.read_csv("/ServersExercise/Data/connectivity_events.csv")
events.columns = ['date','device_id','user_id','server_id','connection_state']

servers = pandas.read_csv("/ServersExercise/Data/servers.csv")
servers.columns = ['id','created_at','destroyed_at']



In [106]:
""" 
configuration
"""

fleet_sizes = {
    1:[1,2],
    2:[3,9],
    3:[10,99],
    4:[100,999]   
}

In [107]:
""" 
select first day from data
"""

# events date to datetype
events['date'] = pandas.to_datetime(events['date'])

# create column with date only
events['date_only'] = events['date'].dt.date

# get first day date beggining and end
current_day = events.loc[0]['date_only']
next_day = current_day + datetime.timedelta(days=1)

# create one day mask - set True to events that are in selected timeframe
one_day_mask = (events['date_only'] >= current_day) & (events['date_only'] < next_day)

# select first day
one_day_events = events.loc[one_day_mask]


In [159]:
""" 
get data frame only with devices first connection
"""

# return list of devices first connection only
def get_first_connections(events_list):
    # list devices
    one_day_devices = events_list['device_id']
    
    # get dictionary of first device connection
    only_first_device_connection_dict = one_day_devices.drop_duplicates()
    
    # deictionary - data frame 
    only_first_device_connection = pandas.DataFrame(list(only_first_device_connection_dict.items()), columns = ['original_id','device'])

    return only_first_device_connection


In [154]:
"""
get data frame with events with devices first connection of 
"""

# return list of events with first connection only
def only_first_con_events(events_list):
    # get devices dictionary
    first_con_device_dict = get_first_connections(events_list)
    
    # build first connection events id's list
    first_con_ids = first_con_device_dict['original_id'].values.tolist()

    return events_list.iloc[first_con_ids]



In [110]:
"""
calculate number of devices for every user
"""

# calculate number of devices used by every user
def calculate_users_devices(events_list):
    # first device connection data set
    first_connections = only_first_con_events(events_list)
    # user:number of devices
    temp_dict = {}
    # build dictionary
    temp_first_connections.apply(add_user_device, 'columns',  users_dict=temp_dict)
    return temp_dict
    


# fucntion used in apply to sum up number of users devices
def add_user_device(row, users_dict):
    user_index = row[2]
    device_index = row[1]
    if user_index in users_dict:
        users_dict[user_index] += 1
    else:
        users_dict[user_index] = 1

     

In [141]:
"""
match results with fleet sizes
"""

# set users to fleet sizes
def check_users_fleet_sizes(events_list):
    devices_dict = calculate_users_devices(events_list)
    # dictionary to df
    devices = pandas.DataFrame.from_dict(devices_dict, orient='index')
    # init dictonary with fleets sizes
    numbers_of_fleets = {
        1:0,
        2:0,
        3:0,
        4:0  
    } 

    
    devices.apply(assign_to_fleet_size, 'columns', fleets_dict=numbers_of_fleets)
    return numbers_of_fleets
    
    
# function used in apply to assign user to matching fleet size
def assign_to_fleet_size(row, fleets_dict):
    number_of_devices = row[0]
    for key in fleet_sizes:
        if check_fleet_size(number_of_devices,fleet_sizes[key]):
            fleets_dict[key] += 1
            break
            
    
# returns bolean if specified number of devices matches selected fleet size
def check_fleet_size(number_of_devices, fleet_description):
    return fleet_description[0] <= number_of_devices <= fleet_description[1]


In [138]:
"""
find number of days
"""
# first and last event date
first_day = events['date'][events.index[0]]
last_day = events['date'][events.index[-1]]
# convert those dates to datetime
first_day_datetime = pandas.to_datetime(first_day)
last_day_datetime = pandas.to_datetime(last_day)
# find timedelta
events_timedelta = last_day_datetime - first_day_datetime
# find number of days
number_of_days = events_timedelta.days


In [160]:
"""
iterate through all days to gather the data
"""

# events date to datetype
events['date'] = pandas.to_datetime(events['date'])
# create column with date only
events['date_only'] = events['date'].dt.date
# get first day
current_day = events.loc[0]['date_only']

# iterate through all days
for day in range(number_of_days):
    # get current day end
    next_day = current_day + datetime.timedelta(days=1)
    # create one day mask - set True to events that are in selected timeframe
    one_day_mask = (events['date_only'] >= current_day) & (events['date_only'] < next_day)
    # select first day
    one_day_events = events.loc[one_day_mask]
    # get fleet sizes
    fleet_size = check_users_fleet_sizes(one_day_events)
    print(fleet_size)
    current_day = next_day
    

"""
BŁĄD:
w get_first_connections dodaje kolumnę do słownika - original id - które jest z dataframe events, a potem po tych id
wyszukuje w one_day_events - a tam indeksy są resetowane

"""
    
    
    
    

0      101
1      102
2      103
3      104
4      105
5      106
6       10
7      107
8      108
9      109
10     110
11      11
12     111
13       1
14     112
15     113
16     114
17     115
18     116
19     117
20     118
21     119
22     120
23     121
24      12
25     122
26     123
27     124
28     125
29     126
      ... 
241     95
242      9
243     96
244     97
245     98
246     99
250    249
251    250
252    251
253    252
254    253
255    254
256    255
257    256
258    257
266    258
267    259
268    260
283    261
284    262
295    263
296    264
306    265
307    266
308    267
320    268
321    269
334    270
335    271
358    272
Name: device_id, Length: 271, dtype: int64
{1: 47, 2: 10, 3: 1, 4: 1}
362    143
363    156
364    232
365    250
366    254
367    255
368    258
369    259
370    263
371    271
372    273
373     35
374     67
375      9
377    243
378    249
382    260
383    262
384    264
386     81
388    103
389    136
397    274
400   

IndexError: positional indexers are out-of-bounds