In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import requests as req
import plotly.express as px
import os
from dotenv import load_dotenv

## ШАГ 1

In [2]:
regist = pd.read_csv('./regs_1k.csv')
visits = pd.read_csv('./visits_1k.csv')

print(f'Registration\n {regist.describe()}')

print(f'\nVisits\n {visits.describe()}')

Registration
             user_id
count  1.000000e+03
mean   4.488623e+06
std    2.620568e+06
min    2.236800e+04
25%    2.235489e+06
50%    4.473044e+06
75%    6.779707e+06
max    8.881772e+06

Visits
                                         uuid platform  \
count                                   1000     1000   
unique                                   519        3   
top     251a0926-ece3-4d77-aa42-ab569fdf9fe2      web   
freq                                       4      954   

                                               user_agent                 date  
count                                                1000                 1000  
unique                                                 28                  996  
top     Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...  2023-03-01T08:01:45  
freq                                                   71                    2  


## ШАГ 2

In [4]:
load_dotenv()
API_URL = os.getenv('API_URL')
START = os.getenv('DATE_BEGIN')
END = os.getenv('DATE_END')
print(START)
visits_url = f'{API_URL}/visits?begin={START}&end={END}'
reg_url = f'{API_URL}/registrations?begin={START}&end={END}'
print(visits_url)
print(reg_url)

visitors_req = req.get(visits_url)
visits_df = pd.DataFrame(visitors_req.json())

regs_req = req.get(reg_url)
regs_df = pd.DataFrame(regs_req.json())


2023-03-01
https://data-charts-api.hexlet.app/visits?begin=2023-03-01&end=2023-09-01
https://data-charts-api.hexlet.app/registrations?begin=2023-03-01&end=2023-09-01


In [5]:
print(visits_df.head())
print("\n __________________________________________________________")
print(regs_df.head())

                               visit_id platform  \
0  1de9ea66-70d3-4a1f-8735-df5ef7697fb9      web   
1  f149f542-e935-4870-9734-6b4501eaf614      web   
2  08f0ebd4-950c-4dd9-8e97-b5bdf073eed1      web   
3  19322fed-157c-49c6-b16e-2d5cabeb9592      web   
4  04762a22-3c9f-40c9-9ac9-6628c4381836      web   

                                          user_agent             datetime  
0  Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...  2023-03-01T10:36:22  
1  Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7...  2023-03-01T06:25:00  
2  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2...  2023-03-01T10:26:13  
3  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...  2023-03-01T12:33:06  
4  Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...  2023-03-01T01:38:35  

 __________________________________________________________
              datetime                               user_id  \
0  2023-03-01T07:40:13  2e0f6bb8-b029-4f45-a786-2b53990d37f1   
1  2023-03-01T13:14:00  f007f97c-9d8b-48b5

## ШАГ 3

In [14]:
visits_df.rename(columns={'datetime': 'date'}, inplace=True)
regs_df.rename(columns={'datetime': 'date'}, inplace=True)

visits_df = visits_df[~visits_df['user_agent'].str.lower().str.contains('bot')]

visits_df['date'] = pd.to_datetime(visits_df['date'])
regs_df['date'] = pd.to_datetime(regs_df['date'])

visits_df = visits_df.sort_values('date').drop_duplicates(subset='visit_id', keep='last')
regs_df = regs_df.sort_values('date')

regs_df['date'] = regs_df['date'].dt.strftime('%Y-%m-%d')
visits_df['date'] = visits_df['date'].dt.strftime('%Y-%m-%d')

visits_grouped = visits_df.groupby(['date', 'platform'])['visit_id'].count().reset_index()
registrations_grouped = regs_df.groupby(['date', 'platform'])['user_id'].count().reset_index()


visits_grouped.rename(columns={'visit_id': 'visits'}, inplace=True)
registrations_grouped.rename(columns={'user_id': 'registrations'}, inplace=True)



In [15]:
print(visits_grouped.head())
print(registrations_grouped.head())

         date platform  visits
0  2023-03-01  android      75
1  2023-03-01      ios      22
2  2023-03-01      web     279
3  2023-03-02  android      67
4  2023-03-02      ios      31
         date platform  registrations
0  2023-03-01  android             61
1  2023-03-01      ios             18
2  2023-03-01      web              8
3  2023-03-02  android             59
4  2023-03-02      ios             24


In [16]:
conversion_df = pd.merge(visits_grouped, registrations_grouped,
                         on=['date', 'platform'], how='outer')

conversion_df[['visits', 'registrations']] = conversion_df[['visits', 'registrations']].fillna(0)

conversion_df['conversion'] = (conversion_df['registrations'] / conversion_df['visits']) * 100
conversion_df['conversion'] = conversion_df['conversion'].round(6)

conversion_df.rename(columns={'date': 'date_group'}, inplace=True)

conversion_df = conversion_df.sort_values('date_group')

conversion_df.to_json('conversion.json')

print(conversion_df.head())

   date_group platform  visits  registrations  conversion
0  2023-03-01  android      75             61   81.333333
1  2023-03-01      ios      22             18   81.818182
2  2023-03-01      web     279              8    2.867384
3  2023-03-02  android      67             59   88.059701
4  2023-03-02      ios      31             24   77.419355


## ШАГ 4