## 

In [56]:
import pandas as pd

In [57]:
df = pd.read_csv('../data/feed-views.log', sep='\t',  names=['datetime', 'user'], header=None)

In [58]:
df['datetime'] = pd.to_datetime(df.datetime, format='%Y-%m-%d %H:%M:%S.%f')

In [59]:
df = (df
 .assign(year=lambda x: x.datetime.dt.year)
 .assign(month=lambda x: x.datetime.dt.month)
 .assign(day=lambda x: x.datetime.dt.day)
 .assign(hour=lambda x: x.datetime.dt.hour)
 .assign(minute=lambda x: x.datetime.dt.minute)
 .assign(second=lambda x: x.datetime.dt.second))

## daytime

In [60]:
mapping = {'00:00:00': '', '03:59:59': 'night', '06:59:59': 'early morning', '10:59:59': 'morning', '16:59:59': 'afternoon', '19:59:59': 'early evening', '23:59:59': 'evening'}
times = list(map(lambda x: pd.to_datetime(x), mapping))
values = list(mapping.values())[1:]
mapping = dict(zip(pd.IntervalIndex.from_tuples(list(zip(times, times[1:]))), values))
df['daytime'] = pd.cut(pd.to_datetime(df.datetime.dt.strftime('%H:%M:%S')), bins=pd.IntervalIndex(list(mapping)), labels=values).map(mapping)

In [61]:
df.set_index('user', inplace=True)

In [55]:
# checking
# df.assign(time=lambda x: x.datetime.dt.time).groupby('daytime').time.agg(['min', 'max'])

Unnamed: 0_level_0,min,max
daytime,Unnamed: 1_level_1,Unnamed: 2_level_1
night,00:00:13.222265,03:33:07.757714
early morning,04:08:53.496691,04:31:41.164007
morning,08:16:03.918402,10:57:37.331258
afternoon,11:02:12.343448,16:57:03.848299
early evening,17:03:01.606846,19:55:52.386379
evening,20:00:22.994929,23:59:38.758438


## Count

In [45]:
df.count().sum()

8608

In [46]:
df.daytime.value_counts()

evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: daytime, dtype: int64

## Sort

In [47]:
df.sort_values(['hour', 'minute', 'second'])

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


## Min | Max

In [48]:
max_hour_night = df.query('daytime == "night"').hour.max()
max_hour_night

3

In [49]:
min_hour_morning = df.query('daytime == "morning"').hour.min()
min_hour_morning

8

In [50]:
df[df.hour.isin([min_hour_morning, max_hour_night])].sample(random_state=21)

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alexander,2020-05-15 08:35:01.471463,2020,5,15,8,35,1,morning


In [51]:
df[['hour', 'daytime']].mode().loc[0]

hour            22
daytime    evening
Name: 0, dtype: object

## Smallest | Largest

In [52]:
df.query('daytime == "morning"').nsmallest(3, 'hour')['hour']

user
alexander    8
alexander    8
artem        9
Name: hour, dtype: int64

In [53]:
df.query('daytime == "morning"').nlargest(3, 'hour')['hour']

user
konstantin    10
maxim         10
konstantin    10
Name: hour, dtype: int64

## Describe

In [54]:
hour_stats = df.describe()['hour']
iqr = hour_stats['75%'] - hour_stats['25%']
iqr

9.0