In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import OneHotEncoder

In [3]:
# Load the dataset
data = pd.read_csv('online_shoppers_intention.csv')

In [4]:
# Prepare the data
X = data.drop(['Revenue', 'Weekend', 'Informational_Duration'], axis=1)
y_revenue = data['Revenue']
y_weekend = data['Weekend']
y_informational_duration = data['Informational_Duration']

In [5]:

# Encode categorical variables using one-hot encoding
categorical_features = ['Month', 'VisitorType']
X_encoded = pd.get_dummies(X, columns=categorical_features)

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_revenue_train, y_revenue_test, y_weekend_train, y_weekend_test, y_informational_duration_train, y_informational_duration_test = train_test_split(X_encoded, y_revenue, y_weekend, y_informational_duration, test_size=0.2, random_state=42)


In [14]:
# Train the Random Forest classifier for revenue prediction
revenue_classifier = RandomForestClassifier(n_estimators=100)
revenue_classifier.fit(X_train, y_revenue_train)

# Make predictions on the test set for revenue
y_revenue_pred = revenue_classifier.predict(X_test)
print(y_revenue_pred)




[False False  True ... False False False]


In [13]:
# Train the Random Forest classifier for weekend prediction
weekend_classifier = RandomForestClassifier(n_estimators=100)
weekend_classifier.fit(X_train, y_weekend_train)

# Make predictions on the test set for weekend
y_weekend_pred = weekend_classifier.predict(X_test)
print(y_weekend_pred)

[False False False ... False False False]


In [12]:

# Train the Random Forest regressor for informational duration prediction
informational_duration_regressor = RandomForestRegressor(n_estimators=100)
informational_duration_regressor.fit(X_train, y_informational_duration_train)

# Make predictions on the test set for informational duration
y_informational_duration_pred = informational_duration_regressor.predict(X_test)
print(y_informational_duration_pred)

[  0.         248.96926667   0.         ...  84.18396429 407.28262501
   0.        ]


In [10]:
# Evaluate the models
revenue_accuracy = accuracy_score(y_revenue_test, y_revenue_pred)
weekend_accuracy = accuracy_score(y_weekend_test, y_weekend_pred)
informational_duration_rmse = mean_squared_error(y_informational_duration_test, y_informational_duration_pred, squared=False)


In [11]:
# Print the evaluation results
print("Revenue Accuracy:", revenue_accuracy)
print("Weekend Accuracy:", weekend_accuracy)
print("Informational Duration RMSE:", informational_duration_rmse)


Revenue Accuracy: 0.8925385239253852
Weekend Accuracy: 0.7631792376317924
Informational Duration RMSE: 118.30237849592244
