In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data=pd.read_csv("/kaggle/input/advertising/advertising.csv")

In [None]:
data.head(20)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.nunique()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
f, ax = plt.subplots(figsize=(10, 10))
sns.kdeplot(data.Age, data['Daily Time Spent on Site'], color="b", ax=ax)
sns.rugplot(data.Age, color="r", ax=ax)
sns.rugplot(data['Daily Time Spent on Site'], vertical=True, ax=ax)

In [None]:
f, ax = plt.subplots(figsize=(8, 8))
cmap = sns.cubehelix_palette(as_cmap=True, start=0, dark=0, light=3, reverse=True)
sns.kdeplot(data["Daily Time Spent on Site"], data['Daily Internet Usage'],
    cmap=cmap, n_levels=100, shade=True);

In [None]:
from pandas.plotting import scatter_matrix
scatter_matrix(data[['Daily Time Spent on Site', 'Age','Area Income', 'Daily Internet Usage']],
    alpha=0.3, figsize=(10,10))

In [None]:
pd.crosstab(index=data["Country"],columns='count').sort_values(['count'],ascending=False).head(30)

In [None]:
data=data.drop(["Ad Topic Line","City","Country"],axis=1)

In [None]:
data.head()

In [None]:
data["Timestamp"]=pd.to_datetime(data["Timestamp"])

In [None]:
data["Month"]=data['Timestamp'].dt.month

In [None]:
data["day"]=data['Timestamp'].dt.day

In [None]:
data["Day of the week"] = data['Timestamp'].dt.dayofweek

In [None]:
data['Hour'] = data['Timestamp'].dt.hour

In [None]:
data = data.drop(['Timestamp'], axis=1)

In [None]:
data.head()

In [None]:
scatter_matrix(data["Month","day","Day of the week","Hour"],
    alpha=0.3, figsize=(10,10))

In [None]:
scatter_matrix(data,color=data["Clicked on Ad"])

In [None]:
sns.pairplot(data,hue="Clicked on Ad")

In [None]:
from sklearn.model_selection import train_test_split

X = data[['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage',
    'Male']]#, 'Month', 'day' ,'Day of the week']]
y = data['Clicked on Ad']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
param_grid_rf={'max_depth':[2,3,4,5,6,7]}
rf_grid=GridSearchCV(RandomForestClassifier(),param_grid_rf)
rf_grid.fit(X_train,y_train)

In [None]:
print(rf_grid.best_estimator_,"\n",
rf_grid.best_score_)

In [None]:
pre_rf=rf_grid.predict(X_test)

In [None]:
accuracy_score(y_test,pre_rf)

In [None]:
param_grid_lc={"lc__C":np.logspace(-2,2,50)}
pip=Pipeline([('scale',StandardScaler()),("lc",LogisticRegression())])
lc_grid=GridSearchCV(pip,param_grid_lc)

In [None]:
lc_grid.fit(X_train,y_train)

In [None]:
print(lc_grid.best_estimator_,lc_grid.best_score_)

In [None]:
param_grid_svc={"svc__C":[.0001,.001,.01,.1,1,10,100],"svc__gamma":[.0001,.001,.01,.1,1,10,100]}
pipe=Pipeline([("sclaer",StandardScaler()),("svc",SVC(probability=True))])
svc_grid=GridSearchCV(pipe,param_grid_svc)
svc_grid.fit(X_train,y_train)
print(svc_grid.best_estimator_,svc_grid.best_score_)

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
bag_clf=BaggingClassifier(RandomForestClassifier(),n_estimators=7,max_samples=500,bootstrap=True,n_jobs=10,oob_score=True)
bag_clf.fit(X_train,y_train)

In [None]:
bag_clf.oob_score_

In [None]:
 predictions=bag_clf.predict(X_test)

In [None]:
accuracy_score(y_test,predictions)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gbrt=GridSearchCV(GradientBoostingClassifier(),param_grid_rf)
gbrt.fit(X_train,y_train)

In [None]:
print(gbrt.best_estimator_,gbrt.best_score_)

In [None]:
accuracy_score(y_test,gbrt.predict(X_test))

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
mlp=MLPClassifier(solver='adam',activation='relu',hidden_layer_sizes=[10,10],max_iter=5000)
mlp.fit(X_train,y_train)

In [None]:
accuracy_score(y_test,mlp.predict(X_test))

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
voting_clf=VotingClassifier(estimators=[('rf',rf_grid),('lc',lc_grid),('svc',svc_grid),('bagging',bag_clf),('gbrt',gbrt)],voting='soft')
voting_clf.fit(X,y)

In [None]:
pred=voting_clf.predict(X_test)

In [None]:
accuracy_score(y_test,pred)