In [None]:
import numpy as np                   
import pandas as pd                   
import matplotlib.pyplot as plt  
from sklearn.linear_model import LogisticRegression     
import seaborn as sns  
import io
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

In [None]:
#read_csv is the function that enables you to read csv files. it is a prebuild function in pandas
dataframe = pd.read_csv("advertising.csv")

In [None]:
#We are deciding which columns are numerical and which are categorical 
numeric_columns = ['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage' ]
categorical_columns = [ 'Ad Topic Line', 'City', 'Male', 'Country', 'Clicked on Ad' ]

In [None]:
#Transforming categorical data into codes 
"""
https://pandas.pydata.org/docs/user_guide/categorical.html
"""
dataframe['City Codes']= dataframe['City'].astype('category').cat.codes
dataframe['Country Codes'] = dataframe['Country'].astype('category').cat.codes
dataframe[['City Codes','Country Codes']].head(5)

Unnamed: 0,City Codes,Country Codes
0,961,215
1,903,147
2,111,184
3,939,103
4,805,96


In [None]:
#Handling dates format 
dataframe['Month'] = dataframe['Timestamp'].apply(lambda x: x.split('-')[1])
dataframe['Hour'] = dataframe['Timestamp'].apply(lambda x: x.split(':')[0].split(' ')[1])

In [None]:
#Displaying the 5 lines of the newly defined columns in the dataframe
dataframe[['Month','Hour']].head(5)

Unnamed: 0,Month,Hour
0,3,0
1,4,1
2,3,20
3,1,2
4,6,3


In [None]:
X = dataframe.drop(labels=['Ad Topic Line','City','Country','Timestamp','Clicked on Ad'], axis=1)

Y = dataframe['Clicked on Ad']

In [None]:
#Splitting the dataset into : training set and testing set 
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 42)

In [None]:

#training of the logistic regression model 
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, Y_train)
log_reg_pred = log_reg_model.predict(X_test)

#training of the Naive Bayes model 
nav_bayes_model = GaussianNB()
nav_bayes_model.fit(X_train, Y_train)
nav_bayes_pred = nav_bayes_model.predict(X_test)

#Training of the decision tree model 
dec_tree_model = DecisionTreeClassifier()
dec_tree_model.fit(X_train, Y_train)
dec_tree_pred = dec_tree_model.predict(X_test)



In [None]:

#Accuracy using Logistic Regression 
log_reg_accuracy = metrics.accuracy_score(log_reg_pred, Y_test)
print("Accuarcy of this LG model is: \n\n", log_reg_accuracy*100)
#the classification report for precision, recall, F1 Score and support metrics: case of Logistic Regression
print('\n Classification Report for LG: \n', metrics.classification_report(log_reg_pred, Y_test))

#Accuracy using Naive Bayes-Bernoulli 
nav_bayes_accuracy = metrics.accuracy_score(nav_bayes_pred, Y_test)
print("Accuarcy of this NB-Bernoulli model is: \n\n", nav_bayes_accuracy*100)
#the classification report for precision, recall, F1 Score and support metrics: case of Naive Bayes
print('\n Classification Report for NB: \n', metrics.classification_report(nav_bayes_pred, Y_test))

#Accuracy using Decison Tree
dec_tree_accuracy = metrics.accuracy_score(dec_tree_pred, Y_test)
print("Accuarcy of this DT model is: \n\n", dec_tree_accuracy*100)
#the classification report for precision, recall, F1 Score and support metrics: case of Decision Tree
print('\n Classification Report for DT: \n', metrics.classification_report(dec_tree_pred, Y_test))

Accuarcy of this LG model is: 

 90.66666666666666

 Classification Report for LG: 
               precision    recall  f1-score   support

           0       0.96      0.86      0.91       162
           1       0.86      0.96      0.90       138

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300

Accuarcy of this NB-Bernoulli model is: 

 96.0

 Classification Report for NB: 
               precision    recall  f1-score   support

           0       0.96      0.96      0.96       146
           1       0.96      0.96      0.96       154

    accuracy                           0.96       300
   macro avg       0.96      0.96      0.96       300
weighted avg       0.96      0.96      0.96       300

Accuarcy of this DT model is: 

 92.66666666666666

 Classification Report for DT: 
               precision    recall  f1-score   support

           0       0.92      0.92      

[2]: https://pandas.pydata.org/docs/getting_started/install.html 

[3]: https://numpy.org/install/ 


[4]: https://matplotlib.org/stable/users/installing/index.html 


[5]: https://seaborn.pydata.org/installing.html 


[6]: https://scikit-learn.org/stable/install.html 


[7]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/DecisionTree.ipynb 


[8]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/MeanMedianExercise.ipynb 

[9]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/Python101.ipynb 


[10]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/TrainTest.ipynb 



[11]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/MatPlotLib.ipynb 



[12]: https://github.com/PacktPublishing/Hands-On-Data-Science-and-Python-Machine-Learning/blob/master/Outliers.ipynb 


[13]: https://github.com/asavinov/machine-learning-and-data-processing#analysis-of-different-types-of-data 


[14]: https://github.com/asavinov/machine-learning-and-data-processing#libraries-utilities-tools 


[15]: https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas-2nd-edition 