<a href="https://colab.research.google.com/github/oumaimaoubaha/partie_2_TP3/blob/main/conversion_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score


In [3]:


np.random.seed(42)

df = pd.DataFrame({
    "time_on_site": np.random.randint(30, 600, 200),
    "pages_viewed": np.random.randint(1, 15, 200),
    "source": np.random.choice(["organic", "paid", "email", "social"], 200),
    "device": np.random.choice(["mobile", "desktop"], 200),
    "converted": np.random.choice([0, 1], 200, p=[0.7, 0.3])
})

df.to_csv("user_behavior.csv", index=False)

df.head()


Unnamed: 0,time_on_site,pages_viewed,source,device,converted
0,132,2,paid,mobile,0
1,465,7,organic,mobile,0
2,300,14,social,desktop,0
3,136,10,social,mobile,0
4,101,3,paid,mobile,0


In [4]:
X = df[['time_on_site', 'pages_viewed', 'source', 'device']]
y = df['converted']

X.head()


Unnamed: 0,time_on_site,pages_viewed,source,device
0,132,2,paid,mobile
1,465,7,organic,mobile
2,300,14,social,desktop
3,136,10,social,mobile
4,101,3,paid,mobile


In [5]:
X = pd.get_dummies(X, columns=['source', 'device'])
X.head()


Unnamed: 0,time_on_site,pages_viewed,source_email,source_organic,source_paid,source_social,device_desktop,device_mobile
0,132,2,False,False,True,False,False,True
1,465,7,False,True,False,False,False,True
2,300,14,False,False,False,True,True,False
3,136,10,False,False,False,True,False,True
4,101,3,False,False,True,False,False,True


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train.shape, X_test.shape


((160, 8), (40, 8))

In [7]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)


In [8]:
from sklearn.metrics import classification_report, roc_auc_score

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))
print("AUC-ROC :", roc_auc_score(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.66      0.75      0.70        28
           1       0.12      0.08      0.10        12

    accuracy                           0.55        40
   macro avg       0.39      0.42      0.40        40
weighted avg       0.50      0.55      0.52        40

AUC-ROC : 0.4166666666666667


In [9]:
importances = model.feature_importances_
features = X.columns

importance_df = pd.DataFrame({
    "feature": features,
    "importance": importances
}).sort_values(by="importance", ascending=False)

importance_df

Unnamed: 0,feature,importance
0,time_on_site,0.49544
1,pages_viewed,0.356066
5,source_social,0.029201
2,source_email,0.025394
3,source_organic,0.024869
4,source_paid,0.024338
7,device_mobile,0.023027
6,device_desktop,0.021664
