# TrackAd data prediction

## Imports

In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tatoken import token, client

class BearerAuth(requests.auth.AuthBase):
    def __init__(self, token):
        self.token = token
    def __call__(self, r):
        r.headers["authorization"] = "Bearer " + self.token
        return r

## Settings

In [2]:
date_from = '2023-01-01'
date_to = '2024-06-30'
test_date_from = '2024-07-01'
test_date_to = '2024-07-31'
url = f'https://trackad-common.trackad.ai/api/source-statistics/client/{client}/date-from/{date_from}/date-to/{date_to}'
test_url = f'https://trackad-common.trackad.ai/api/source-statistics/client/{client}/date-from/{date_from}/date-to/{date_to}'

## Request

In [None]:
response = requests.get(url, auth=BearerAuth(token))
pd.set_option('display.max_columns', None)
df = pd.json_normalize(response.json()['sourceStatistics'])
df.head()

## KPI selection

In [None]:
df = df[['date', 'source_name', 'visits', 'newVisits', 'bounces', 'orderCount', 'acquisitionCost', 'revenue', 'newOrderCount']]
df['date'] = pd.to_datetime(df['date'])
df.head()

## Dates formating

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['Year'] = df['date'].apply(lambda time: time.year)
df['Month'] = df['date'].apply(lambda time: time.month)
df['Day'] = df['date'].apply(lambda time: time.day)
df['Weekday'] = df['date'].apply(lambda time: time.weekday())
df.head()

## Source encoding

In [6]:
source_names = df['source_name'].unique()

In [None]:
label_encoder = LabelEncoder()

columns = df.select_dtypes(include='object').columns

for column in columns:
    df[column] = label_encoder.fit_transform(df[column])

df.head()

In [None]:
source_name_codes = df['source_name'].unique()
sources_name_codes_table = pd.DataFrame({'source_names':source_names, 'source_name_codes':source_name_codes})
sources_name_codes_table.head()

In [None]:
codes = {
    'name': source_names,
    'code': source_name_codes
}
codes_df = pd.DataFrame(codes).sort_values(['code'])
codes_df

## Model creation

In [10]:
X = df.drop(columns=['orderCount', 'visits', 'newVisits', 'bounces', 'acquisitionCost', 'revenue', 'newOrderCount', 'date'])
y = df['orderCount']

model = RandomForestClassifier()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

model.fit(X_train, y_train)

predictions = model.predict(X_test)
predictions

model_score = accuracy_score(y_test, predictions)
model_score

0.9236289082521784

## Output creation

In [None]:
X.iloc[:0].to_dict()

In [13]:
resulting_df = codes_df['name']
day = 1
month = 7
weekday = 0
year = 2024
sources_number = 213

In [14]:
while day < 32:
    test_inputs = {
        'source_name': range(sources_number), 
        'Year': np.full(sources_number, year),
        'Month': np.full(sources_number, month), 
        'Day': np.full(sources_number, day), 
        'Weekday': np.full(sources_number, weekday)
    }
    test_df = pd.DataFrame(test_inputs)
    test_df[f'{year}-{month}-{day}'] = model.predict(test_df).tolist()
    resulting_df = pd.concat([resulting_df, test_df.drop(['Year', 'Month', 'Day', 'Weekday', 'source_name'], axis=1)], axis=1)
    day += 1
    if weekday < 7:
        weekday += 1
    else:
        weekday = 0

In [15]:
resulting_df.to_csv('out.csv', encoding='utf-8', index=False, header=True)