<a href="https://colab.research.google.com/github/teakrcmar/KPI-predictions/blob/main/prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!git clone https://github.com/teakrcmar/KPI-predictions.git

Cloning into 'KPI-predictions'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 6 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (6/6), 9.12 KiB | 9.12 MiB/s, done.


In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense

In [8]:
df = pd.read_csv("KPI-predictions/Marketing.csv")

In [9]:
df.head()

Unnamed: 0,id,c_date,campaign_name,category,campaign_id,impressions,mark_spent,clicks,leads,orders,revenue
0,1,2021-02-01,facebook_tier1,social,349043,148263,7307.37,1210,13,1,4981.0
1,2,2021-02-01,facebOOK_tier2,social,348934,220688,16300.2,1640,48,3,14962.0
2,3,2021-02-01,google_hot,search,89459845,22850,5221.6,457,9,1,7981.0
3,4,2021-02-01,google_wide,search,127823,147038,6037.0,1196,24,1,2114.0
4,5,2021-02-01,youtube_blogger,influencer,10934,225800,29962.2,2258,49,10,84490.0


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             308 non-null    int64  
 1   c_date         308 non-null    object 
 2   campaign_name  308 non-null    object 
 3   category       308 non-null    object 
 4   campaign_id    308 non-null    int64  
 5   impressions    308 non-null    int64  
 6   mark_spent     308 non-null    float64
 7   clicks         308 non-null    int64  
 8   leads          308 non-null    int64  
 9   orders         308 non-null    int64  
 10  revenue        308 non-null    float64
dtypes: float64(2), int64(6), object(3)
memory usage: 26.6+ KB


In [11]:
df.isnull().sum()
df.duplicated().sum()

0

In [12]:
# Get the total number of entries
total_entries = len(df)

# Calculate the indices for splitting
train_end = int(total_entries * 0.8)  # 80% for training
val_end = int(total_entries * 0.9)   # 10% for validation

# Split the dataset
train_set = df.iloc[:train_end]  # First 80%
val_set = df.iloc[train_end:val_end]  # Next 10%
test_set = df.iloc[val_end:]  # Last 10%

# Print the sizes of the splits
print(f"Training set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Test set size: {len(test_set)}")

Training set size: 246
Validation set size: 31
Test set size: 31


In [16]:
# Calculate ROI
df['ROI'] = ((df['revenue'] - df['mark_spent']) / df['mark_spent']) * 100

# Calculate CPA_leads
df['CPA_leads'] = df['mark_spent'] / df['leads']

# Calculate CTR
df['CTR'] = (df['clicks'] / df['impressions']) * 100

# Calculate Conversion Rate from Clicks to Leads
df['Conversion_Rate_Clicks_Leads'] = (df['leads'] / df['clicks']) * 100

df['c_date'] = pd.to_datetime(df['c_date'])
df.set_index('c_date', inplace=True)

In [19]:
#Efficiency and Cost Analysis

# Cost Per Click (CPC)
df['CPC'] = df['mark_spent'] / df['clicks']
# Cost Per Thousand Impressions (CPM)
df['CPM'] = (df['mark_spent'] / df['impressions']) * 1000
# Average Order Value (AOV)
df['AOV'] = df['revenue'] / df['orders']
# Revenue Per Click (RPC)
df['RPC'] = df['revenue'] / df['clicks']

# Encode 'campaign_name' using LabelEncoder
label_encoder = LabelEncoder()
df['campaign_name'] = label_encoder.fit_transform(df['campaign_name'])

# Select relevant features
features = ['campaign_name', 'impressions', 'mark_spent', 'clicks', 'leads', 'orders']
target = 'revenue'

# Scale the data (features and target)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[features + [target]])

In [23]:
def create_sequences(data, target_index, seq_length=10):
    """
    Create input-output pairs for time-series forecasting.
    - data: Scaled dataset (numpy array)
    - target_index: Index of the target variable in the data
    - seq_length: Number of time steps in each sequence
    """
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length, :-1])  # Features
        y.append(data[i+seq_length, target_index])  # Target
    return np.array(X), np.array(y)

seq_length = 10  # Example: Use the last 10 days to predict the next day's revenue
X, y = create_sequences(scaled_data, target_index=-1, seq_length=seq_length)

X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]

In [24]:
# Define the GRU model
model = Sequential([
    GRU(64, return_sequences=True, input_shape=(seq_length, X_train.shape[2])),
    GRU(32),
    Dense(1)  # Single output for revenue prediction
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Display the model architecture
model.summary()

  super().__init__(**kwargs)


In [40]:
# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=300,
    batch_size=32,
    verbose=1
)

Epoch 1/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0034 - val_loss: 2.5866e-04
Epoch 2/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0036 - val_loss: 1.3814e-04
Epoch 3/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0034 - val_loss: 1.9000e-04
Epoch 4/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0038 - val_loss: 1.7709e-04
Epoch 5/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0034 - val_loss: 1.5254e-04
Epoch 6/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0026 - val_loss: 1.3927e-04
Epoch 7/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0028 - val_loss: 1.8992e-04
Epoch 8/300
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0034 - val_loss: 1.7143e-04
Epoch 9/300
[1m8/8[0m 

In [41]:
# Save the trained model
model.save("gru_model.h5")



In [42]:
# Evaluate the model on the test data
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 1.1449e-04
Test Loss: 0.00011449104204075411


In [43]:
# Print a summary of the model
model.summary()


In [44]:
# Evaluate the model on the training data
train_loss = model.evaluate(X_train, y_train, verbose=1)
print(f"Training Loss: {train_loss}")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 6.5172e-04 
Training Loss: 0.0006184969679452479


In [45]:
# Add a placeholder column (zeros) to match the scaler's input shape
padding = np.zeros((y_train_pred.shape[0], 1))  # Placeholder for one missing column

# Combine features and predictions
y_train_pred_combined = np.hstack((X_train[:, -1, :-1], y_train_pred, padding))

# Reverse scale the predictions
y_train_pred_rescaled = scaler.inverse_transform(y_train_pred_combined)[:, -2]  # Extract the 'revenue' column
# Combine features and actual revenue
y_train_actual_combined = np.hstack((X_train[:, -1, :-1], y_train.reshape(-1, 1), padding))

# Reverse scale the actual revenue
y_train_actual_rescaled = scaler.inverse_transform(y_train_actual_combined)[:, -2]

In [46]:
# Display comparison
for i in range(10):  # First 10 samples
    print(f"Predicted Revenue: {y_train_pred_rescaled[i]:.2f}, Actual Revenue: {y_train_actual_rescaled[i]:.2f}")


Predicted Revenue: -0.48, Actual Revenue: 0.00
Predicted Revenue: 0.78, Actual Revenue: 2.34
Predicted Revenue: -0.71, Actual Revenue: 0.00
Predicted Revenue: 2.25, Actual Revenue: 3.14
Predicted Revenue: 7.12, Actual Revenue: 1.65
Predicted Revenue: 11.38, Actual Revenue: 19.57
Predicted Revenue: 6.74, Actual Revenue: 5.98
Predicted Revenue: 0.13, Actual Revenue: 0.00
Predicted Revenue: -2.32, Actual Revenue: 0.00
Predicted Revenue: 0.91, Actual Revenue: 0.15
