# PART 4 : RECOMMENDATION TO IMPROVE THE NEWSLETTER'S CONVERSION RATE

STEP 1 : GETTING INFORMATION ABOUT THE DATASET

In [19]:
# Loading librairie needed
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import  OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
import plotly.express as px

# To hide warning log
import warnings
warnings.filterwarnings('ignore')

In [20]:
# oading dataset
dataset = pd.read_csv("src/conversion_best_model.csv")

In [21]:
dataset.shape

(31620, 6)

In [22]:
dataset.head()

Unnamed: 0,country,age,new_user,source,total_pages_visited,converted
0,UK,28,No,Seo,16,1
1,UK,22,Yes,Direct,5,0
2,China,32,Yes,Seo,1,0
3,US,32,Yes,Ads,6,0
4,China,25,No,Seo,3,0


STEP 2 : REPLACING THE NUMBER BY CORRESPONDING NAME CATEGORY

In [23]:
dataset['new_user'] = dataset["new_user"].apply(lambda x : "Yes" if x == 1 else "No")

STEP 3 : EXPLORATION DATA ANALYSIS USERS CONVERTED

In [24]:
# Creating a dataset with only converted people
mask = dataset['converted'] == 1
data_converted = dataset.loc[mask,:]

In [25]:
fig1 = px.histogram(data_converted["country"], 
            histnorm= 'percent', 
            title="Percent country", 
            text_auto=True).update_xaxes(categoryorder='total descending')

fig1.update_layout(autosize=True, 
                    width=600,
                    height=500
                    )
fig1.show()

In [26]:
fig2 = px.histogram(data_converted["age"], 
            title="Age repartition", 
            text_auto=True, 
            nbins=10)
fig2.update_layout(autosize=True, 
                    width=500,
                    height=500
                    )
fig2.show()
    

In [27]:
fig3 = px.histogram(data_converted["new_user"], 
            title="New user repartition", 
            histnorm= 'percent',
            text_auto=True, 
            nbins=10)

fig3.update_layout(autosize=True, 
                    width=500,
                    height=500
                    )
fig3.show()

In [28]:
fig4 = px.histogram(data_converted["source"], 
            histnorm= 'percent', 
            title="Percent source", 
            text_auto=True).update_xaxes(categoryorder='total descending')

fig4.update_layout(autosize=True, 
                    width=500,
                    height=500
                    )
fig4.show()

In [29]:
fig5 = px.histogram(data_converted["total_pages_visited"], 
            title="Number of visited pages by users", 
            nbins=10,
            text_auto=True).update_xaxes(categoryorder='total descending')

fig5.update_layout(autosize=True, 
                    width=600,
                    height=500
                    )
fig5.show()

# END PART 4