In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

path = '/content/drive/My Drive/data/feed-views.log' 

In [7]:
df = pd.read_csv(path, names=['datetime', 'user'], sep='\t')
df.head()

Unnamed: 0,datetime,user
0,2020-04-17 12:01:08.463179,artem
1,2020-04-17 12:01:23.743946,artem
2,2020-04-17 12:27:30.646665,artem
3,2020-04-17 12:35:44.884757,artem
4,2020-04-17 12:35:52.735016,artem


In [8]:
df['datetime'] = pd.to_datetime(df['datetime'])
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['minute'] = df['datetime'].dt.minute
df['second'] = df['datetime'].dt.second
df.head()


Unnamed: 0,datetime,user,year,month,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52


In [10]:
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
bins = [0, 4, 7, 11, 17, 20, 24]
df['daytime'] = pd.cut(df['hour'], labels=labels, bins=bins, include_lowest=True, right=False)
df.set_index('user', inplace=True)
df.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon


In [11]:
df.count()

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

In [12]:
df.value_counts('daytime')

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
dtype: int64

In [13]:
df.sort_values(by=['hour', 'minute', 'second'])

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


In [14]:
max = df[df['daytime'] == 'night']['hour'].max()
min = df[df['daytime'] == 'morning']['hour'].min()
max, min

(3, 8)

In [17]:
df[(df['daytime'] == 'night') & (df['hour'] == 3)][:1]


Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
konstantin,2020-04-19 03:23:35.471598,2020,4,19,3,23,35,night


In [18]:
df[(df['daytime'] == 'morning') & (df['hour'] == 8)][:1]

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alexander,2020-05-15 08:16:03.918402,2020,5,15,8,16,3,morning


In [19]:
df.hour.mode()

0    22
dtype: int64

In [20]:
df.daytime.mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' <
                         'evening']

In [21]:
earliest = df[df['daytime'] == 'morning'].nsmallest(3, ['hour', 'minute', 'second'])
latest = df.nlargest(3, ['hour', 'minute', 'second'])
print (earliest, latest)

                            datetime  year  month  day  hour  minute  second  \
user                                                                           
alexander 2020-05-15 08:16:03.918402  2020      5   15     8      16       3   
alexander 2020-05-15 08:35:01.471463  2020      5   15     8      35       1   
alexander 2020-05-15 09:02:24.999438  2020      5   15     9       2      24   

           daytime  
user                
alexander  morning  
alexander  morning  
alexander  morning                               datetime  year  month  day  hour  minute  second  \
user                                                                           
alexander 2020-05-14 23:59:38.758438  2020      5   14    23      59      38   
valentina 2020-05-14 23:58:56.754866  2020      5   14    23      58      56   
pavel     2020-05-09 23:54:54.260791  2020      5    9    23      54      54   

           daytime  
user                
alexander  evening  
valentina  evening  
pavel    

In [22]:
df.describe()

Unnamed: 0,year,month,day,hour,minute,second
count,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
std,0.0,0.335557,4.906567,6.95549,17.689388,17.405506
min,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020.0,5.0,30.0,23.0,59.0,59.0


In [23]:
iqr = df.describe()['hour']['75%'] - df.describe()['hour']['25%']
print (iqr)

9.0
