## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

## Disable warnings

In [2]:
import warnings
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore")

## Loading the Data

In [4]:
data = pd.read_csv("advertisement.csv")

In [5]:
data

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Gender,Country,Timestamp,Clicked on Ad
0,62.26,32.0,69481.85,172.83,Decentralized real-time circuit,Lisafort,Male,Svalbard & Jan Mayen Islands,2016-06-09 21:43:05,0
1,41.73,31.0,61840.26,207.17,Optional full-range projection,West Angelabury,Male,Singapore,2016-01-16 17:56:05,0
2,44.40,30.0,57877.15,172.83,Total 5thgeneration standardization,Reyesfurt,Female,Guadeloupe,2016-06-29 10:50:45,0
3,59.88,28.0,56180.93,207.17,Balanced empowering success,New Michael,Female,Zambia,2016-06-21 14:32:32,0
4,49.21,30.0,54324.73,201.58,Total 5thgeneration standardization,West Richard,Female,Qatar,2016-07-21 10:54:35,1
...,...,...,...,...,...,...,...,...,...,...
9995,41.73,31.0,61840.26,207.17,Profound executive flexibility,West Angelabury,Male,Singapore,2016-01-03 03:22:15,1
9996,41.73,28.0,51501.38,120.49,Managed zero tolerance concept,Kennedyfurt,Male,Luxembourg,2016-05-28 12:20:15,0
9997,55.60,39.0,38067.08,124.44,Intuitive exuding service-desk,North Randy,Female,Egypt,2016-01-05 11:53:17,0
9998,46.61,50.0,43974.49,123.13,Realigned content-based leverage,North Samantha,Female,Malawi,2016-04-04 07:07:46,1


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Daily Time Spent on Site  10000 non-null  float64
 1   Age                       10000 non-null  float64
 2   Area Income               10000 non-null  float64
 3   Daily Internet Usage      10000 non-null  float64
 4   Ad Topic Line             10000 non-null  object 
 5   City                      10000 non-null  object 
 6   Gender                    10000 non-null  object 
 7   Country                   10000 non-null  object 
 8   Timestamp                 10000 non-null  object 
 9   Clicked on Ad             10000 non-null  int64  
dtypes: float64(4), int64(1), object(5)
memory usage: 781.4+ KB


In [7]:
data.describe()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Clicked on Ad
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,61.660757,35.9401,53840.047721,177.759831,0.4917
std,15.704142,8.572973,13343.708718,40.820951,0.499956
min,32.6,19.0,13996.5,105.22,0.0
25%,48.86,29.0,44052.3025,140.15,0.0
50%,59.59,35.0,56180.93,178.92,0.0
75%,76.58,42.0,61840.26,212.67,1.0
max,90.97,60.0,79332.33,269.96,1.0


In [9]:
data.isna().sum

<bound method NDFrame._add_numeric_operations.<locals>.sum of       Daily Time Spent on Site    Age  Area Income  Daily Internet Usage  \
0                        False  False        False                 False   
1                        False  False        False                 False   
2                        False  False        False                 False   
3                        False  False        False                 False   
4                        False  False        False                 False   
...                        ...    ...          ...                   ...   
9995                     False  False        False                 False   
9996                     False  False        False                 False   
9997                     False  False        False                 False   
9998                     False  False        False                 False   
9999                     False  False        False                 False   

      Ad Topic Line   Cit

In [10]:
data.isnull().sum()

Daily Time Spent on Site    0
Age                         0
Area Income                 0
Daily Internet Usage        0
Ad Topic Line               0
City                        0
Gender                      0
Country                     0
Timestamp                   0
Clicked on Ad               0
dtype: int64

In [11]:
data.columns

Index(['Daily Time Spent on Site', 'Age', 'Area Income',
       'Daily Internet Usage', 'Ad Topic Line', 'City', 'Gender', 'Country',
       'Timestamp', 'Clicked on Ad'],
      dtype='object')

In [12]:
data["Gender"] = data["Gender"].map({"Female":0, "Male":1})

In [None]:
## Splitting the data

In [13]:
x = data.iloc[:,0:7]
x = x.drop(['Ad Topic Line','City'],axis=1)
x

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Gender
0,62.26,32.0,69481.85,172.83,1
1,41.73,31.0,61840.26,207.17,1
2,44.40,30.0,57877.15,172.83,0
3,59.88,28.0,56180.93,207.17,0
4,49.21,30.0,54324.73,201.58,0
...,...,...,...,...,...
9995,41.73,31.0,61840.26,207.17,1
9996,41.73,28.0,51501.38,120.49,1
9997,55.60,39.0,38067.08,124.44,0
9998,46.61,50.0,43974.49,123.13,0


In [14]:
y = data.iloc[:, 9]
y

0       0
1       0
2       0
3       0
4       1
       ..
9995    1
9996    0
9997    0
9998    1
9999    1
Name: Clicked on Ad, Length: 10000, dtype: int64

In [15]:
xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                y,
                                                test_size = 0.1,
                                                random_state = 42)

## Training the Click-Through Rate (CTR) Prediction Model

In [17]:
model = LogisticRegression(C=0.01,random_state=0)
model.fit(xtrain, ytrain)
model.score(xtrain, ytrain)

0.6906666666666667

In [18]:
ypred = model.predict(xtest)
ypred

array([1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,

In [19]:
ypred_prob = model.predict_proba(xtest)
ypred_prob

array([[0.22221294, 0.77778706],
       [0.27398993, 0.72601007],
       [0.364856  , 0.635144  ],
       ...,
       [0.38194242, 0.61805758],
       [0.1892669 , 0.8107331 ],
       [0.0678078 , 0.9321922 ]])

In [20]:
print(f"Accuracy Score: {accuracy_score(ytest, ypred)}")
print(f"F1 Score: {f1_score(ytest, ypred)}")

Accuracy Score: 0.688
F1 Score: 0.6722689075630253
