In [5]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

try:
    # Step 1: Load the dataset
    df = pd.read_csv('online_shoppers_intention.csv')

    # Step 2: Preprocess the data
    # Handling missing values
    imputer = SimpleImputer(strategy='median')
    df['Administrative'] = imputer.fit_transform(df[['Administrative']])
    df['Administrative_Duration'] = imputer.fit_transform(df[['Administrative_Duration']])
    df['Informational'] = imputer.fit_transform(df[['Informational']])
    df['Informational_Duration'] = imputer.fit_transform(df[['Informational_Duration']])
    df['ProductRelated'] = imputer.fit_transform(df[['ProductRelated']])
    df['ProductRelated_Duration'] = imputer.fit_transform(df[['ProductRelated_Duration']])

    # Encoding categorical variables
    label_encoder = LabelEncoder()
    df['Month'] = label_encoder.fit_transform(df['Month'])
    df['VisitorType'] = label_encoder.fit_transform(df['VisitorType'])
    df['Weekend'] = label_encoder.fit_transform(df['Weekend'])

    # Encoding target variable
    df['Revenue'] = label_encoder.fit_transform(df['Revenue'])

    # Step 3: Split the data into features and target
    X = df.drop(['Revenue', 'Weekend'], axis=1)
    y_revenue = df['Revenue']
    y_weekend = df['Weekend']

    # Step 4: Train and evaluate the model for Revenue prediction
    X_train, X_test, y_train_revenue, y_test_revenue = train_test_split(X, y_revenue, test_size=0.2, random_state=42)

    # Scale numerical features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    gb_revenue = GradientBoostingClassifier()
    gb_revenue.fit(X_train, y_train_revenue)

    y_pred_revenue = gb_revenue.predict(X_test)

    # Evaluate the model performance for Revenue prediction
    accuracy_revenue = accuracy_score(y_test_revenue, y_pred_revenue)
    precision_revenue = precision_score(y_test_revenue, y_pred_revenue)
    recall_revenue = recall_score(y_test_revenue, y_pred_revenue)
    f1_revenue = f1_score(y_test_revenue, y_pred_revenue)

    print("Revenue Prediction:")
    print("Accuracy:", accuracy_revenue)
    print("Precision:", precision_revenue)
    print("Recall:", recall_revenue)
    print("F1-score:", f1_revenue)

    # Step 5: Train and evaluate the model for Weekend prediction
    X_train, X_test, y_train_weekend, y_test_weekend = train_test_split(X, y_weekend, test_size=0.2, random_state=42)

    # Scale numerical features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    gb_weekend = GradientBoostingClassifier()
    gb_weekend.fit(X_train, y_train_weekend)

    y_pred_weekend = gb_weekend.predict(X_test)

    # Evaluate the model performance for Weekend prediction
    accuracy_weekend = accuracy_score(y_test_weekend, y_pred_weekend)
    precision_weekend = precision_score(y_test_weekend, y_pred_weekend)
    recall_weekend = recall_score(y_test_weekend, y_pred_weekend)
    f1_weekend = f1_score(y_test_weekend, y_pred_weekend)

    print("Weekend Prediction:")
    print("Accuracy:", accuracy_weekend)
    print("Precision:", precision_weekend)
    print("Recall:", recall_weekend)
    print("F1-score:", f1_weekend)

except Exception as e:
    print("An error occurred:", str(e))


Revenue Prediction:
Accuracy: 0.8909164639091647
Precision: 0.7290322580645161
Recall: 0.5498783454987834
F1-score: 0.6269070735090153
Weekend Prediction:
Accuracy: 0.7643957826439578
Precision: 0.8979591836734694
Recall: 0.07096774193548387
F1-score: 0.13153961136023917
