## Creating a simple model

In [1]:
import pandas as pd
feats = pd.read_csv('../data/OSI_feats_e3.csv')
target = pd.read_csv('../data/OSI_target_e2.csv')

In [2]:
from sklearn.model_selection import train_test_split
test_size = 0.2
random_state = 42
X_train, X_test, y_train, y_test = train_test_split(feats, target, test_size=test_size, random_state=random_state)

In [4]:
print(f'Shape of X_train: {X_train.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'Shape of X_test: {X_test.shape}')
print(f'Shape of y_train: {y_test.shape}')

Shape of X_train: (9864, 68)
Shape of y_train: (9864, 1)
Shape of X_test: (2466, 68)
Shape of y_train: (2466, 1)


In [5]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42, max_iter=10000)
model.fit(X_train, y_train['Revenue'])

In [6]:
y_pred = model.predict(X_test)

In [8]:
from sklearn import metrics
accuracy = metrics.accuracy_score(y_pred=y_pred, y_true=y_test)
print(f'Accuracy of the model is {accuracy*100:.4f}%')

Accuracy of the model is 86.9830%


In [9]:
precision, recall, fscore, _ = metrics.precision_recall_fscore_support(y_pred=y_pred, y_true=y_test, average='binary')
print(f'Precision: {precision:.4f}\nRecall: {recall:.4f}\nfscore: {fscore: .4f}')

Precision: 0.7296
Recall: 0.3479
fscore:  0.4712


In [11]:
coef_list = [f'{feature}: {coef}' for coef, feature in sorted(zip(model.coef_[0], X_train.columns.values.tolist()))]
for item in coef_list:
    print(item)

VisitorType_Returning_Visitor: -0.7290073763869693
Month_Dec: -0.6725609746462208
TrafficType_13: -0.63176889721717
OperatingSystems_3: -0.5785536758185749
TrafficType_3: -0.5472348597679381
Month_Mar: -0.5289268014533274
Month_May: -0.47154785586402886
Region_9: -0.43399032494557155
ExitRates: -0.36625078806089423
SpecialDay: -0.3373455366679398
VisitorType_New_Visitor: -0.31030988504623097
Browser_1: -0.2814308479453697
Month_June: -0.2765872758471352
BounceRates: -0.26758545501936254
Region_4: -0.24635164720658567
TrafficType_1: -0.24065942161090542
OperatingSystems_8: -0.22589583119664866
Browser_4: -0.21333616706551212
Browser_6: -0.205507917853827
Region_7: -0.1946388986058375
Browser_2: -0.18666589440064796
TrafficType_6: -0.17765155576962768
OperatingSystems_1: -0.17688234870845188
OperatingSystems_4: -0.15582206060002327
OperatingSystems_2: -0.15542346958823364
Browser_3: -0.1519736117838351
Browser_13: -0.15073527742476953
Region_3: -0.14382539593792232
Browser_5: -0.08709519