In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns

In [40]:
df = pd.read_csv('~/Old_Projects/Alerting_Bot/spend_data.csv')

In [41]:
def metrics(df: pd.DataFrame) -> pd.DataFrame :
    df['CPM'] = (df.spend_usd / df.impressions * 1000).round(2)
    df['CPI'] = (df.spend_usd / df.installs).round(2)
    df['CPC'] = (df.spend_usd / df.clicks).round(2)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    return df

In [42]:
df = df[df.offer == 'yaeda_restaurant']

In [43]:
df = df.groupby(['business_dt', 'offer', 'network']).sum(numeric_only=True).reset_index()

In [44]:
df = metrics(df)

In [45]:
df.shape

(210, 10)

In [49]:
df = df[(df.CPM > 0) & (df.CPM > 0) & (df.CPC > 0)]

In [62]:
X = df[['network', 'offer', 'CPM', 'CPC', 'CPI']]

In [63]:
X = pd.get_dummies(X)

In [64]:
model = IsolationForest(n_estimators=1000, contamination=0.1, random_state=42, bootstrap=True)
model.fit(X)

In [65]:
outliers = model.predict(X)

In [66]:
scores = model.decision_function(X)

In [67]:
df['is_anomaly'] = outliers

In [68]:
df[df.is_anomaly == -1].shape

(15, 12)

In [79]:
threshold = np.percentile(scores, 7)

In [80]:
outliers_custom = np.where(scores < threshold, -1, 1) 

In [81]:
df['is_anom_c'] = outliers_custom

In [82]:
df[df.is_anom_c == -1].shape

(11, 12)

In [83]:
df[df.is_anomaly == -1]

Unnamed: 0,business_dt,offer,network,spend_usd,impressions,clicks,installs,CPM,CPI,CPC,is_anomaly,is_anom_c
4,2024-09-01,yaeda_restaurant,VKADS,776.080059,729889.0,915,25.0,1.06,31.04,0.85,-1,1
11,2024-09-02,yaeda_restaurant,VKADS,778.752074,746964.0,891,34.0,1.04,22.9,0.87,-1,-1
18,2024-09-03,yaeda_restaurant,VKADS,781.697999,738103.0,870,15.0,1.06,52.11,0.9,-1,-1
23,2024-09-04,yaeda_restaurant,Mintegral,102.7418,477030.0,65,110.0,0.22,0.93,1.58,-1,-1
60,2024-09-09,yaeda_restaurant,VKADS,904.546695,917556.0,1073,26.0,0.99,34.79,0.84,-1,-1
67,2024-09-10,yaeda_restaurant,VKADS,814.979705,821472.0,976,18.0,0.99,45.28,0.84,-1,-1
137,2024-09-20,yaeda_restaurant,VKADS,909.743523,657661.0,786,33.0,1.38,27.57,1.16,-1,-1
144,2024-09-21,yaeda_restaurant,VKADS,909.743523,659002.0,838,15.0,1.38,60.65,1.09,-1,-1
165,2024-09-24,yaeda_restaurant,VKADS,951.780141,779686.0,975,38.0,1.22,25.05,0.98,-1,-1
175,2024-09-26,yaeda_restaurant,Bigo,573.2874,1099836.0,4378,0.0,0.52,0.0,0.13,-1,1
