In [82]:
import pandas as pd

### Читаем файл, используя разделитель, задаем заголовки

In [83]:
views = pd.read_csv('feed-views.log', header = None, names=['datetime', 'user'], engine='python', sep = '\t', parse_dates=['datetime'])
views.head()

Unnamed: 0,datetime,user
0,2020-04-17 12:01:08.463179,artem
1,2020-04-17 12:01:23.743946,artem
2,2020-04-17 12:27:30.646665,artem
3,2020-04-17 12:35:44.884757,artem
4,2020-04-17 12:35:52.735016,artem


### Информация о типе данных (datetime64[ns])

In [84]:
views.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1076 entries, 0 to 1075
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   datetime  1076 non-null   datetime64[ns]
 1   user      1076 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 16.9+ KB


### Создаем новые столбцы из datetime

In [85]:
views['year'] = views.datetime.dt.year
views['mouth'] = views.datetime.dt.month
views['day'] = views.datetime.dt.day
views['hour'] = views.datetime.dt.hour
views['minute'] = views.datetime.dt.minute
views['second'] = views.datetime.dt.second
views

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22


0.00 – 03.59 night

04.00 – 06.59 early morning 

07.00 – 10.59 morning

11.00– 16.59 afternoon 

17.00 – 19.59 early evening

20.00 – 23.59 evening

In [86]:
interval = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
views['daytime'] = pd.cut(views['hour'], [0, 4, 7, 11, 17, 20, 24], labels=interval, include_lowest=True, right=False)
views

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8,afternoon
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23,afternoon
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30,afternoon
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44,afternoon
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20,early evening
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6,evening
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49,evening
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22,evening


### Количество данных

In [87]:
views.count()

datetime    1076
user        1076
year        1076
mouth       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

### количество элементов в каждой категории времени суток

In [88]:
views['daytime'].value_counts()

evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: daytime, dtype: int64

#### Cортируем значения по часам, минутам и секундам 
#### в порядке возрастания (одновременно, а не по очереди)

In [89]:
views = views.sort_values(['hour', 'minute', 'second'])
views

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
944,2020-05-15 00:00:13.222265,valentina,2020,5,15,0,0,13,night
945,2020-05-15 00:01:05.153738,valentina,2020,5,15,0,1,5,night
563,2020-05-12 00:01:27.764025,pavel,2020,5,12,0,1,27,night
564,2020-05-12 00:01:38.444917,pavel,2020,5,12,0,1,38,night
565,2020-05-12 00:01:55.395042,pavel,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...,...
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22,evening
246,2020-05-09 23:53:55.599821,anatoliy,2020,5,9,23,53,55,evening
247,2020-05-09 23:54:54.260791,pavel,2020,5,9,23,54,54,evening
942,2020-05-14 23:58:56.754866,valentina,2020,5,14,23,58,56,evening


#### Рассчитать минимум и максимум часов

In [90]:
max = views[views.daytime == 'night'].hour.max()
min = views[views.daytime == 'night'].hour.min()

max_m = views[views.daytime == 'morning'].hour.max()
min_m = views[views.daytime == 'morning'].hour.min()
max, min, max_m, min_m

(3, 0, 10, 8)

In [91]:
views[(views.daytime == 'night') & (views.hour == views[views.daytime == 'night'].hour.max())]

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
46,2020-04-19 03:23:35.471598,konstantin,2020,4,19,3,23,35,night
47,2020-04-19 03:23:55.473926,konstantin,2020,4,19,3,23,55,night
48,2020-04-19 03:33:07.757714,konstantin,2020,4,19,3,33,7,night


In [92]:
views[(views.daytime == 'morning') & (views.hour == views[views.daytime == 'morning'].hour.min())]

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
963,2020-05-15 08:16:03.918402,alexander,2020,5,15,8,16,3,morning
964,2020-05-15 08:35:01.471463,alexander,2020,5,15,8,35,1,morning


In [93]:
views.hour.mode()


0    22
Name: hour, dtype: int64

In [94]:
views.daytime.mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' < 'evening']

показать 3 самых ранних часа утра и соответствующие имена пользователей и
3 последних часа и имена пользователей, использующие nsmallest() и nlargest()

In [95]:
views[views.daytime == 'morning'].nsmallest(3,['hour', 'minute', 'second'])

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
963,2020-05-15 08:16:03.918402,alexander,2020,5,15,8,16,3,morning
964,2020-05-15 08:35:01.471463,alexander,2020,5,15,8,35,1,morning
965,2020-05-15 09:02:24.999438,alexander,2020,5,15,9,2,24,morning


In [96]:
views[views.daytime == 'morning'].nlargest(3,['hour', 'minute', 'second'])

Unnamed: 0,datetime,user,year,mouth,day,hour,minute,second,daytime
14,2020-04-18 10:57:37.331258,konstantin,2020,4,18,10,57,37,morning
229,2020-05-09 10:56:59.161519,konstantin,2020,5,9,10,56,59,morning
13,2020-04-18 10:56:55.833899,maxim,2020,4,18,10,56,55,morning


#### Получаем базовую статистику 

In [97]:
views.describe()

Unnamed: 0,year,mouth,day,hour,minute,second
count,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
std,0.0,0.335557,4.906567,6.95549,17.689388,17.405506
min,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020.0,5.0,30.0,23.0,59.0,59.0


### Самый популярный интервал посещения

In [98]:
iqr = views.describe().hour['75%'] - views.describe().hour['25%']
iqr

9.0