##### Подготовка к работе с данными

In [74]:
import requests
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as matcol
import numpy as np
import seaborn as sns
from dotenv import load_dotenv
from pathlib import Path

##### Получаем данные по API

In [75]:
load_dotenv()

DATE_BEGIN = os.getenv('DATE_BEGIN')
DATE_END = os.getenv('DATE_END')
API_URL = os.getenv('API_URL')

##### Данные по визитам

In [76]:
visits_json = requests.get('https://data-charts-api.hexlet.app/visits?begin=2022-03-01&end=2023-09-01').json()
#visits_json=requests.get(f'{API_URL}/visits?begin={DATE_BEGIN}&end={DATE_END}')
visits_df = pd.DataFrame(visits_json)
visits_df['datetime'] = pd.to_datetime(visits_df['datetime'])
visits_df

Unnamed: 0,datetime,platform,user_agent,visit_id
0,2023-03-01 10:36:22,web,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,1de9ea66-70d3-4a1f-8735-df5ef7697fb9
1,2023-02-26 19:48:19,web,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,1de9ea66-70d3-4a1f-8735-df5ef7697fb9
2,2023-03-01 06:25:00,web,Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7...,f149f542-e935-4870-9734-6b4501eaf614
3,2023-02-26 03:58:26,web,Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7...,f149f542-e935-4870-9734-6b4501eaf614
4,2023-02-26 06:55:23,web,Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7...,f149f542-e935-4870-9734-6b4501eaf614
...,...,...,...,...
264545,2023-08-29 10:43:29,web,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...,6d2e2f5b-970b-4dfe-8f57-25711a5b2a5d
264546,2023-08-29 19:29:16,web,Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109...,1d91535e-d984-4f76-bbaa-c14c0fd4f2e2
264547,2023-08-29 12:51:11,web,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,f2496721-7126-430d-976e-777a6cdccb4f
264548,2023-08-29 21:45:08,web,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,f2496721-7126-430d-976e-777a6cdccb4f


##### Данные по регистрациям

In [77]:
reg_json = requests.get('https://data-charts-api.hexlet.app/registrations?begin=2022-03-01&end=2023-09-01').json()
#reg_json=requests.get(f'{API_URL}/registrations?begin={DATE_BEGIN}&end={DATE_END}')
regestrations_df = pd.DataFrame(reg_json)
regestrations_df['datetime'] = pd.to_datetime(regestrations_df['datetime'])
regestrations_df

Unnamed: 0,datetime,email,platform,registration_type,user_id
0,2023-03-01 07:40:13,ebyrd@example.org,web,google,2e0f6bb8-b029-4f45-a786-2b53990d37f1
1,2023-03-01 13:14:00,knightgerald@example.org,web,email,f007f97c-9d8b-48b5-af08-119bb8f6d9b6
2,2023-03-01 03:05:50,cherylthompson@example.com,web,apple,24ff46ae-32b3-4a74-8f27-7cf0b8f32f15
3,2023-03-01 00:04:47,halldavid@example.org,web,email,3e9914e1-5d73-4c23-b25d-b59a3aeb2b60
4,2023-03-01 18:31:52,denise86@example.net,web,google,27f875fc-f8ce-4aeb-8722-0ecb283d0760
...,...,...,...,...,...
21831,2023-08-31 07:32:08,ikelley@example.net,ios,apple,a94b4a28-f25c-433c-b4ef-85af7d5b0c11
21832,2023-08-31 23:25:50,brittanycox@example.net,ios,apple,fc709c6b-528b-40d8-8980-c2c278e5a628
21833,2023-08-31 19:29:18,aryan@example.net,ios,apple,11e32805-7571-4108-8e50-e211d051bf7e
21834,2023-08-31 19:38:54,emartinez@example.net,ios,email,bce7b818-768b-434a-b6af-573cd60bff7f


##### Очистка данных визитов от ботов и группировка по датам и платформам

In [78]:
def get_visits(df_v):
    prep_df = df_v.copy()
    prep_df['user_agent'] = prep_df['user_agent'].astype('str').replace('AdsBot-Google (+http://www.google.com/adsbot.html)', np.NaN)
    prep_df['user_agent'] = prep_df['user_agent'].astype('str').replace('Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)', np.NaN)
    prep_df['user_agent'] = prep_df['user_agent'].astype('str').replace('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', np.NaN)
    prep_df['user_agent'] = prep_df['user_agent'].astype('str').replace('Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.130 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', np.NaN)
    prep_df = prep_df.dropna()
    # оставляем только последние посещения
    prep_df = prep_df.sort_values(by='datetime', ascending=False).drop_duplicates(subset='visit_id')
    prep_df['datetime'] = prep_df['datetime'].dt.to_period('D').dt.start_time.dt.date
    prep_df = prep_df.groupby(['datetime', 'platform']).agg({'visit_id': 'count'}).reset_index()
    # Переименуем столбцы
    prep_df = prep_df.rename(columns={'datetime': 'date_group','visit_id': 'visits'})
    return prep_df

visits = get_visits(visits_df)
visits

Unnamed: 0,date_group,platform,visits
0,2023-03-01,android,75
1,2023-03-01,ios,22
2,2023-03-01,web,279
3,2023-03-02,android,67
4,2023-03-02,ios,31
...,...,...,...
589,2023-08-30,ios,66
590,2023-08-30,web,1227
591,2023-08-31,android,57
592,2023-08-31,ios,50


##### Очистка данных регистраций от ботов и группировка по датам и платформам

In [79]:
def get_regs(df_r):
    prep_df = df_r.copy()
    prep_df['datetime'] = prep_df['datetime'].dt.to_period('D').dt.start_time.dt.date
    prep_df = prep_df.sort_values('datetime').groupby(['datetime', 'platform']).agg({'user_id': 'count'}).reset_index()
    # Переименуем столбцы
    prep_df = prep_df.rename(columns={'datetime': 'date_group', 'user_id': 'registrations'})
    return prep_df

registrations = get_regs(regestrations_df)
registrations
    

Unnamed: 0,date_group,platform,registrations
0,2023-03-01,android,61
1,2023-03-01,ios,18
2,2023-03-01,web,8
3,2023-03-02,android,59
4,2023-03-02,ios,24
...,...,...,...
547,2023-08-30,ios,40
548,2023-08-30,web,34
549,2023-08-31,android,42
550,2023-08-31,ios,36


##### Соединение датасетов и расчет конверсий

In [80]:
def merged_df(visits, registrations):
    merged_df = pd.merge(visits, registrations, left_on=['date_group', 'platform'], right_on=['date_group', 'platform'])
    merged_df['conversion'] = (merged_df['registrations'] / merged_df['visits'] * 100)
    merged_df.to_json('./conversion.json', orient='columns')
    return merged_df

conversions = merged_df(visits, registrations)

##### Загружаем и обрабатываем данные из файла ads.csv

In [81]:
#ads_df = pd.read_csv(path)
ads_df = pd.read_csv('./ads.csv')

##### Обработка колонки date

In [82]:
def get_ads(df):
    prep_ads = df.copy()
    prep_ads['date'] = pd.to_datetime(prep_ads['date'], format='mixed')
    prep_ads['date'] = prep_ads['date'].dt.to_period('D').dt.start_time.dt.date
    prep_ads = prep_ads.rename(columns={'date': 'date_group'})
    return prep_ads

prep_ads = get_ads(ads_df)

##### Объединим датафремы conversion и ads

In [83]:

merged_ads = pd.merge(
    conversions, prep_ads,
    left_on='date_group',
    right_on='date_group',
    how='inner'
)

##### Удаляем лишние столбцы и заполняем пропуски

In [84]:
clear_ads = merged_ads.drop(['platform', 'conversion', 'utm_source', 'utm_medium'], axis=1)
filled_ads = clear_ads.fillna({'utm_campaign': 'none', 'cost': 0})

##### Группирем и сортируем данные 

In [85]:
ads = filled_ads.sort_values('date_group').groupby(['date_group', 'utm_campaign']).sum().reset_index()
# Изменяем порядок столбцов
ads = ads[['date_group', 'visits', 'registrations', 'cost', 'utm_campaign']]
ads['utm_campaign'].unique()

array(['advanced_algorithms_series', 'virtual_reality_workshop',
       'ui_ux_design_drive', 'women_in_tech_symposium',
       'intro_to_python_course'], dtype=object)

In [86]:
ads

Unnamed: 0,date_group,visits,registrations,cost,utm_campaign
0,2023-03-01,376,87,636,advanced_algorithms_series
1,2023-03-02,613,106,756,advanced_algorithms_series
2,2023-03-03,683,107,606,advanced_algorithms_series
3,2023-03-04,647,159,669,advanced_algorithms_series
4,2023-03-05,707,115,795,advanced_algorithms_series
...,...,...,...,...,...
137,2023-08-27,795,88,666,intro_to_python_course
138,2023-08-28,635,83,669,intro_to_python_course
139,2023-08-29,1268,143,570,intro_to_python_course
140,2023-08-30,1328,101,327,intro_to_python_course


##### Конверсия датафрейма в JSON

In [87]:
ads.to_json('./ads.json', orient='columns')

### Построение графиков

##### Подготовка данных для визуализации

In [88]:
def get_weekly_data(df1, df2):
    temp_weekly_conv1 = df1.copy()
    temp_weekly_conv2 = df2.copy()

    temp_weekly_conv1['date_week'] = pd.to_datetime(temp_weekly_conv1['date_group'])
    temp_weekly_conv1['date_week'] = temp_weekly_conv1['date_week'].dt.to_period('W').dt.start_time.dt.date

    temp_weekly_conv2['date_week'] = pd.to_datetime(temp_weekly_conv2['date_group'])
    temp_weekly_conv2['date_week'] = temp_weekly_conv2['date_week'].dt.to_period('W').dt.start_time.dt.date

    weekly_conv_platform = temp_weekly_conv2.groupby(['date_week', 'platform']).sum(['visits', 'registrations']).reset_index()
    weekly_conv = temp_weekly_conv1.groupby(['date_week']).sum(['visits', 'registrations']).reset_index()

    weekly_conv_platform['conversion'] = (weekly_conv_platform['registrations']/weekly_conv_platform['visits']) * 100
    weekly_conv = (weekly_conv['registrations']/weekly_conv['visits']) * 100
    return weekly_conv, weekly_conv_platform

weekly_conv, weekly_conv_platform = get_weekly_data(ads, conversions)

##### Создание директории charts

In [89]:
p = Path.cwd()
d = p / 'charts'
d.mkdir(exist_ok=True)

##### Построение графика Total visits

In [92]:
def total_visits(df):
    fig, ax = plt.subplots(figsize=(18, 9), tight_layout=True)
    bars = plt.bar(x='date_week', height='visits', data=weekly_conv_platform, width=5)
    #ax.bar_label(bars)
    plt.title('Total visits', fontsize=16)
    plt.xlabel('Date_group', fontsize=14)
    plt.ylabel('Visits', fontsize=14)
    plt.xticks(df['date_week'], rotation=45)
    plt.grid(axis='y')
    plt.tight_layout()
    plt.savefig(f"./charts/total_visits.png")
    plt.close(fig)

total_visits(weekly_conv_platform)

##### Построение графика Visits by Platform

In [93]:
def total_visits_by_platform(df):
    fig, ax = plt.subplots(figsize=(18, 9), tight_layout=True)
    df = df.pivot_table(index='date_week', columns='platform', values='visits')
    df.plot(kind='bar', stacked=True, ax=ax)
    plt.title('Visits by Platform', fontsize=16)
    plt.xlabel('Date_group',fontsize=14)
    plt.ylabel('Visits',fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(axis='y')
    plt.savefig(f"./charts/Total_visits_platform.png")
    plt.close(fig)

total_visits_by_platform(weekly_conv_platform)


##### Построение графика Total registrations

In [94]:
def total_registrations(df):
    fig, ax = plt.subplots(figsize=(18,9), tight_layout=True)
    bars = plt.bar(x='date_week', height='registrations', data=weekly_conv_platform, width=5)
    #ax.bar_label(bars)
    plt.title('Total registrations', fontsize=16)
    plt.xlabel('Date_group', fontsize=14)
    plt.ylabel('Registrations', fontsize=14)
    plt.xticks(df['date_week'], rotation=45)
    plt.grid(axis='y')
    plt.savefig(f"{d}/Total_registrations.png")
    plt.close(fig)

total_registrations(weekly_conv_platform)

##### Построение графика Total registrations by platform