In [103]:
import pandas as pd

# Ex01. Основные операции

## 1. Чтение файла feed-views.log

In [104]:
views = pd.read_csv('data/feed-views.log', delimiter='\t', names=['datetime', 'user'])

### 1.1. Приведение типа даты к datetime64[ns]

In [105]:
views['datetime'] = views['datetime'].astype('datetime64[ns]')

### 1.2. Извлечение данных даты в новые столбцы фрейма

In [106]:
dt = views.datetime.dt
views = views.assign(
    year = dt.year,
    month = dt.month, 
    day = dt.day, 
    hour = dt.hour, 
    minute = dt.minute, 
    second = dt.second
)

views

Unnamed: 0,datetime,user,year,month,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22


## 2. Новый столбец со значением времени суток

In [107]:
bins = [0, 3, 6, 10, 16, 19, 23]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening','evening']
views['daytime'] = pd.cut(views['hour'], bins = bins, labels = labels, include_lowest = True)
views.set_index('user', inplace = True)

views

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening


## 3. Количество элементов в фрейме данных

In [108]:
views.count()

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

### 3.1 Количество элементов в каждой категории времени суток

In [109]:
views.value_counts('daytime')

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: count, dtype: int64

## 4. Сортировка данных по часам, минутам и секундам

In [110]:
views = views.sort_values(by = ['hour', 'minute', 'second'])
views

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


## 5. Рассчет значений по времени

### 5.1. Максимальный час для строк, где время суток — ночь

In [111]:
max_night = views[views['daytime'] == 'night']['hour'].max()
int(max_night)

3

### 5.2. Минимальный час для строк, где время суток — утро

In [112]:
min_morning = views[views['daytime'] == 'morning']['hour'].min()
int(min_morning)

8

### 5.3. Пользователи, посетившие страницу в эти часы

In [113]:
views[views.daytime == 'night'].hour.idxmax()

'konstantin'

In [114]:
views[views.daytime == 'morning'].hour.idxmin()

'alexander'

### 5.4. Мода дл часа и времени суток

In [115]:
views.hour.mode()

0    22
Name: hour, dtype: int32

In [116]:
views.daytime.mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' < 'evening']

## 6. Cамые ранние и поздние часы посещений

In [121]:
views.nsmallest(3, 'hour').hour

user
valentina    0
valentina    0
pavel        0
Name: hour, dtype: int32

In [118]:
views.nlargest(3, 'hour').hour

user
ekaterina    23
ekaterina    23
ekaterina    23
Name: hour, dtype: int32

## 7. Самый популярный интервал посещений

In [119]:
quantiles = views.hour.describe()
iqr = quantiles['75%'] - quantiles['25%']
int(iqr)

9