## Email Marketing Effectiveness Prediction

Aim : to predict how effective the email marketing is.

In [113]:
import pandas as pd # import pandas library for loading and describing dataset

In [114]:
df = pd.read_csv('email_marketing_data.csv') # loading dataset

In [115]:
df.head() # viewing first 5 rows of the data

In [65]:
df.info # overview of data

In [66]:
df.isnull().sum() # checking null records

In [67]:
# there are no null records

In [68]:
df = pd.get_dummies(data=df,columns=['Email_Type','Email_Source_Type','Customer_Location','Email_Campaign_Type','Time_Email_sent_Category']) # encoding all categorical data

In [69]:
df.head()

In [70]:
from sklearn.preprocessing import LabelEncoder

In [71]:
labelencoder = LabelEncoder()

In [72]:
df['Email_Status'] = labelencoder.fit_transform(df['Email_Status']) # encoding email_status to numerical data for visualization

In [73]:
df['Email_Status'] # view the numerical Email_Status

In [74]:
df.head(120)
# 0 = clicked
# 1 = not_opened
# 2 = opened

In [75]:
import seaborn as sns # importing seaborn to visualize the data

In [76]:
sns.barplot(x='Email_Status',y='Subject_Hotness_Score',data=df)

In [77]:
sns.barplot(x='Email_Status',y='Total_Past_Communications',data=df)

In [78]:
# feature scaling

from sklearn.preprocessing import StandardScaler

In [79]:
df.columns

In [80]:
X = df.drop(['Email_ID',
       'Word_Count', 'Total_Links', 'Total_Images', 'Email_Status',
       'Email_Type_Promotional', 'Email_Type_Transactional',
       'Email_Source_Type_App', 'Email_Source_Type_Website',
       'Customer_Location_Apex', 'Customer_Location_Bay',
       'Customer_Location_Central', 'Customer_Location_Downtown',
       'Customer_Location_East', 'Customer_Location_Forest',
       'Customer_Location_Green', 'Email_Campaign_Type_Campaign_1',
       'Email_Campaign_Type_Campaign_2', 'Email_Campaign_Type_Campaign_3',], axis=1)
y = df['Email_Status'] # assigning target column to y

In [81]:
X

In [82]:
y

In [83]:
# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) # standardizes the features in X

In [84]:
df.head()

In [85]:
# splitting training and testing data

from sklearn.model_selection import train_test_split

In [86]:
X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size = 0.3,random_state=42)

### Logistic Regression

In [87]:
# training logistic regression

from sklearn.linear_model import LogisticRegression

In [88]:
model = LogisticRegression() # creating instance of LogisticRegression

In [89]:
model.fit(X_train,y_train) # training the model using training data

In [90]:
accuracy_score(y_train,model.predict(X_train)) # accuracy of training

In [91]:
# model evaluation

from sklearn.metrics import accuracy_score

In [92]:
prediction = model.predict(X_test) # making predictions using the testing data

In [93]:
accuracy_score(y_test,prediction) # calculating accuracy of prediction

In [94]:
df.head()

### Random Forest

In [95]:
from sklearn.ensemble import RandomForestClassifier # importing RandomClassifier class

In [96]:
rfc = RandomForestClassifier(n_estimators=100,random_state=42) # creating instance of the class

In [97]:
rfc.fit(X_train,y_train) # training the model

In [98]:
accuracy_score(y_train,rfc.predict(X_train)) # accuracy of training

In [99]:
prediction_rfc = rfc.predict(X_test) #making the predictions

In [100]:
accuracy_score(y_test,prediction_rfc) #calculating accuracy of prediction

### K-Nearest Neighbors

In [101]:
from sklearn.neighbors import KNeighborsClassifier # importing KNeighborsClassifier class

In [102]:
knn = KNeighborsClassifier(n_neighbors=5) # creating instance of KNeighborsClassifier

In [103]:
knn.fit(X_train,y_train) # training the model

In [104]:
accuracy_score(y_train,knn.predict(X_train)) # accuracy of training

In [105]:
prediction_knn = knn.predict(X_test) # making predictions

In [106]:
accuracy_score(y_test,prediction_knn) # calculating accuracy of prediciton

### Decision Tree

In [107]:
from sklearn.tree import DecisionTreeClassifier # importing DecisionTreeClassifier class

In [108]:
dt = DecisionTreeClassifier() # creating instance of DecisionTreeClassifier

In [109]:
dt.fit(X_train,y_train) # training the model

In [110]:
accuracy_score(y_train,dt.predict(X_train)) # accuracy of training

In [111]:
prediction_dt = dt.predict(X_test) # making predictions

In [112]:
accuracy_score(y_test,prediction_dt) # calculating accuracy of prediction