In [1]:
# for Mathematical and Dataframe Operations
import numpy as np
import pandas as pd

# for Data Visualizations
import seaborn as sns
import matplotlib.pyplot as plt

### Reading the Dataset

In [2]:
# reading the data set
data = pd.read_csv('input/data.csv')

# Print the Shape
data.shape

(569, 33)

In [3]:
# lets check the head of the dataset
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [4]:
## lets analyze the Campaigns

print("Number of Unique Ads :", data['ad_id'].nunique())
print("Number of Campaigns :", data['xyz_campaign_id'].nunique())
print("Number of Facebook Campaigns :", data['fb_campaign_id'].nunique())
print("Number of Interest Groups :", data['interest'].nunique())
print("Number of Age Groups :", data['age'].nunique())

KeyError: 'ad_id'

In [None]:
# as we can see that every ad is having an Unique ID, so we will remove that column.
# again, Facebook campaign has no relation with any conversion, so, let's remove this column also.

data = data.drop(['ad_id', 'fb_campaign_id'], axis = 1)

# now lets check the column names again to make sure above columns are removed
data.columns

### Feature Engineering

* ```Click-through-rate (CTR)```: This is the percentage of how many of our impressions became clicks. A high CTR is often seen as a sign of good creative being presented to a relevant audience. A low click through rate is suggestive of less-than-engaging adverts (design and / or messaging) and / or presentation of adverts to an inappropriate audience. What is seen as a good CTR will depend on the type of advert (website banner, Google Shopping ad, search network test ad etc.) and can vary across sectors, but 2% would be a reasonable benchmark.

--------------
* ```Conversion Rate (CR)```: This is the percentage of clicks that result in a 'conversion'. What a conversion is will be determined by the objectives of the campaign. It could be a sale, someone completing a contact form on a landing page, downloading an e-book, watching a video, or simply spending more than a particular amount of time or viewing over a target number of pages on a website.

-------------
* ```Cost Per Click (CPC)```: Self-explanatory this one: how much (on average) did each click cost. While it can often be seen as desirable to reduce the cost per click, the CPC needs to be considered along with other variables. For example, a campaign with an average CPC of £0.5 and a CR of 5% is likely achieving more with its budget than one with a CPC of £0.2 and a CR of 1% (assuming the conversion value is the same.

-----------------
* ```Cost Per Conversion```: Another simple metric, this figure is often more relevant than the CPC, as it combines the CPC and CR metrics, giving us an easy way to quickly get a feel for campaign effectiveness.

In [None]:
# Performing Feature Engineering to understand the Campaigns in Detail

# lets create a Column to understand CTR
data['ClickThroughRate'] = ((data['Clicks']/data['Impressions'])*100)

# lets create a column to understand CPC
data['CostPerClick'] = (data['Spent']/data['Clicks'])

# lets create a column to understand Conversion Rate
data['ConversionRate'] = (data['Approved_Conversion']/data['Total_Conversion'])

# lets create a column to understand the Cost per Conversion
data['CostPerConversion'] = (data['Spent']/data['Approved_Conversion']) 

* ```Conversion Value```: A conversion value is a numerical value that you assign to specific conversions in order to represent their impact to your business. The major benefit to assigning conversion values is to help you track, optimize, and report on your return on ad spend (ROAS).

----------------
* ```ROAS```: ROAS stands for return on ad spend—a marketing metric that measures the amount of revenue your business earns for each dollar it spends on advertising. For all intents and purposes, ROAS is practically the same as another metric you're probably familiar with: return on investment, or ROI.

-----------------
* ```Cost Per Mille```: This number is the cost of one thousand impressions. If your objective is ad exposure to increase brand awareness, this might be an important KPI for you to measure.

**We don't have the actual numbers to understand the Profit of after Conversion, but for the purposes of Understanding, let's assume that an Approved conversion is worth 100 dollars.** 

In [None]:
# lets add More Features to Understand the Return on Investment also known as ROAS(Return on Ad Spend)

# lets calculate the Conversion Value, Let's Assume that the Value of Sales if 100 Dollars
data['ConversionValue'] = data['Approved_Conversion']*100

# lets calculate the ROAS Which is the Target Variable for us
data['ROAS'] = round(data['ConversionValue']/data['Spent'], 2)

# lets Calculate CPM to understand the Brand Awareness from Campaigns
data['CostPerMille'] = round((data['Spent']/data['Impressions'])*1000, 2)

In [None]:
# Lets remove all the records where we have any Nan, value or Infinity Value
data = data.replace([np.inf, -np.inf], np.nan).dropna(axis=0)

# lets check the shape of the data after remving unnecessary records
print("Shape of the Data After Removing Nans and Infs :", data.shape)

In [None]:
# lets Compare the Campaign based on the New Features Created

data[['xyz_campaign_id','ClickThroughRate','CostPerClick','ConversionRate',
     'ConversionValue','CostPerMille', 'ROAS']].groupby(['xyz_campaign_id'],
         as_index = False).agg('mean').rename(columns = {'xyz_campaign_id': 'Campaign',
                             'ClickThroughRate': 'Average CTR',
                             'CostPerClick': 'Average Cost/Click', 
                        'ConversionRate':'Average ConversionRate'}).style.background_gradient(cmap = 'Wistia')