# TrackAd data prediction

## Imports

In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tatoken import token, client

class BearerAuth(requests.auth.AuthBase):
    def __init__(self, token):
        self.token = token
    def __call__(self, r):
        r.headers["authorization"] = "Bearer " + self.token
        return r

## Settings

In [2]:
date_from = '2023-01-01'
date_to = '2024-06-30'
test_date_from = '2024-07-01'
test_date_to = '2024-07-31'
url = f'https://trackad-common.trackad.ai/api/source-statistics/client/{client}/date-from/{date_from}/date-to/{date_to}'
test_url = f'https://trackad-common.trackad.ai/api/source-statistics/client/{client}/date-from/{date_from}/date-to/{date_to}'

## Request

In [3]:
response = requests.get(url, auth=BearerAuth(token))
pd.set_option('display.max_columns', None)
df = pd.json_normalize(response.json()['sourceStatistics'])
df.head()

Unnamed: 0,dateFrom,date,source_name,sourceTypeName,agency,business_model,deduplication,visits,newVisits,click,impression,audience,conversions,conversionsPostView,conversionsPostClick,conversionsRevenue,conversionsPostViewRevenue,conversionsPostClickRevenue,pageviews,timeOnSite,timePerVisit,bounces,orderCount,acquisitionCost,adAccountCost,revenue,doneRevenue,waitRevenue,canceledRevenue,firstClickRevenue,firstClickDoneRevenue,firstClickOrderCount,canceledOrderCount,newOrderCount,campaignOrderCount,margin,visitsUseful,orderCountDesktop,orderCountTablet,orderCountMobile,installs,orderCountByPaymentDate,commissionByPaymentDate,revenueByPaymentDate,participation,configurable_kpi_1,configurable_kpi_2,configurable_kpi_3,configurable_kpi_4,configurable_kpi_5
0,2023-01-01,2023-01-01,Yandex EPK,SEM Not Brand,TrackAd outsourcing,CPC,Последний платный клик,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0
1,2023-01-01,2023-01-01,VKR epromo,Display - RTB,TrackAd outsourcing,CPC,Последний платный клик,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0
2,2023-01-01,2023-01-01,Flocktory Besplanto,Affiliate,TrackAd outsourcing,CPO,Последний платный клик,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0
3,2023-01-01,2023-01-01,Yandex Display,Display - RTB,TrackAd outsourcing,CPM,Последний платный клик,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0
4,2023-01-01,2023-01-01,Telegram,Social media,Internal,Free,Последний клик,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0


## KPI selection

In [4]:
df = df[['date', 'source_name', 'visits', 'newVisits', 'bounces', 'orderCount', 'acquisitionCost', 'revenue', 'newOrderCount']]
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,source_name,visits,newVisits,bounces,orderCount,acquisitionCost,revenue,newOrderCount
0,2023-01-01,Yandex EPK,0,0,0,0,0.0,0.0,0
1,2023-01-01,VKR epromo,0,0,0,0,0.0,0.0,0
2,2023-01-01,Flocktory Besplanto,0,0,0,0,0.0,0.0,0
3,2023-01-01,Yandex Display,0,0,0,0,0.0,0.0,0
4,2023-01-01,Telegram,0,0,0,0,0.0,0.0,0


## Dates formating

In [5]:
df['date'] = pd.to_datetime(df['date'])
df['Year'] = df['date'].apply(lambda time: time.year)
df['Month'] = df['date'].apply(lambda time: time.month)
df['Day'] = df['date'].apply(lambda time: time.day)
df['Weekday'] = df['date'].apply(lambda time: time.weekday())
df.head()

Unnamed: 0,date,source_name,visits,newVisits,bounces,orderCount,acquisitionCost,revenue,newOrderCount,Year,Month,Day,Weekday
0,2023-01-01,Yandex EPK,0,0,0,0,0.0,0.0,0,2023,1,1,6
1,2023-01-01,VKR epromo,0,0,0,0,0.0,0.0,0,2023,1,1,6
2,2023-01-01,Flocktory Besplanto,0,0,0,0,0.0,0.0,0,2023,1,1,6
3,2023-01-01,Yandex Display,0,0,0,0,0.0,0.0,0,2023,1,1,6
4,2023-01-01,Telegram,0,0,0,0,0.0,0.0,0,2023,1,1,6


## Source encoding

In [6]:
source_names = df['source_name'].unique()

In [7]:
label_encoder = LabelEncoder()

columns = df.select_dtypes(include='object').columns

for column in columns:
    df[column] = label_encoder.fit_transform(df[column])

df.head()

Unnamed: 0,date,source_name,visits,newVisits,bounces,orderCount,acquisitionCost,revenue,newOrderCount,Year,Month,Day,Weekday
0,2023-01-01,197,0,0,0,0,0.0,0.0,0,2023,1,1,6
1,2023-01-01,179,0,0,0,0,0.0,0.0,0,2023,1,1,6
2,2023-01-01,79,0,0,0,0,0.0,0.0,0,2023,1,1,6
3,2023-01-01,196,0,0,0,0,0.0,0.0,0,2023,1,1,6
4,2023-01-01,159,0,0,0,0,0.0,0.0,0,2023,1,1,6


In [8]:
source_name_codes = df['source_name'].unique()
sources_name_codes_table = pd.DataFrame({'source_names':source_names, 'source_name_codes':source_name_codes})
sources_name_codes_table.head()

Unnamed: 0,source_names,source_name_codes
0,Yandex EPK,197
1,VKR epromo,179
2,Flocktory Besplanto,79
3,Yandex Display,196
4,Telegram,159


In [9]:
codes = {
    'name': source_names,
    'code': source_name_codes
}
codes_df = pd.DataFrame(codes).sort_values(['code'])
codes_df

Unnamed: 0,name,code
173,ActionPay LPC,0
178,Admitad LPC,1
5,Admitad LPC Cashback,2
24,Admitad LPC coupon,3
112,Banner Design,4
...,...,...
9,Yandex Unknown UTMs,209
206,Yandex organic,210
193,Yandex.market,211
30,Youtube Blog,212


## Model creation

In [10]:
X = df.drop(columns=['orderCount', 'visits', 'newVisits', 'bounces', 'acquisitionCost', 'revenue', 'newOrderCount', 'date'])
y = df['orderCount']

model = RandomForestClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

0.9236289082521784

In [11]:
y_test

36065     0
30401     0
22615     0
79213     0
57611     0
         ..
34763     0
99116     0
106858    0
11520     1
109742    0
Name: orderCount, Length: 11706, dtype: int64

## Output creation

In [12]:
X.iloc[:0].to_dict()

{'source_name': {}, 'Year': {}, 'Month': {}, 'Day': {}, 'Weekday': {}}

In [13]:
resulting_df = codes_df['name']
day = 1
month = 7
weekday = 0
year = 2024
sources_number = 213

In [14]:
while day < 32:
    test_inputs = {
        'source_name': range(sources_number), 
        'Year': np.full(sources_number, year),
        'Month': np.full(sources_number, month), 
        'Day': np.full(sources_number, day), 
        'Weekday': np.full(sources_number, weekday)
    }
    test_df = pd.DataFrame(test_inputs)
    test_df[f'{year}-{month}-{day}'] = model.predict(test_df).tolist()
    resulting_df = pd.concat([resulting_df, test_df.drop(['Year', 'Month', 'Day', 'Weekday', 'source_name'], axis=1)], axis=1)
    day += 1
    if weekday < 7:
        weekday += 1
    else:
        weekday = 0

In [15]:
resulting_df.to_csv('out.csv', encoding='utf-8', index=False, header=True)