In [57]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('online_shoppers_intention.csv')
df.dropna(inplace=True)
df = df.drop_duplicates()
df = df.drop('Month', axis=1)
df['Purchased'] = df['Revenue'] > 0

for column in df.select_dtypes(include='object'):
    encoder = LabelEncoder()
    df[column] = encoder.fit_transform(df[column])

X_train, X_test, y_train, y_test = train_test_split(df.drop('Purchased', axis=1), df['Purchased'], test_size=0.25)
model = DecisionTreeClassifier(random_state=42)
param_grid = {
    'max_depth': [3, 5, 7, 9],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5]
}

grid_search = GridSearchCV(model, param_grid, scoring='accuracy', cv=5)
grid_search.fit(X_train, y_train)

print(grid_search.best_params_)

accuracy = accuracy_score(y_test, grid_search.predict(X_test))
print("Accuracy score:", accuracy)

confusion_matrix = confusion_matrix(y_test, grid_search.predict(X_test))
print("Confusion matrix:\n", confusion_matrix)


{'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Accuracy score: 1.0
Confusion matrix:
 [[2592    0]
 [   0  460]]


In [58]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

df = pd.DataFrame({'dteday': ['01-01-2018', '02-01-2018', '29-08-2018', '09-08-2018', '10-08-2018', '11-08-2018', '12-08-2018', '13-08-2018', '14-08-2018'], 'cnt': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df['dteday'] = pd.to_datetime(df['dteday'], format='%d-%m-%Y')
df['dteday'] = pd.to_numeric(df['dteday'], errors='coerce')
df = df.dropna()

X_train, X_test, y_train, y_test = train_test_split(df.drop('cnt', axis=1), df['cnt'], test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('RMSE:', rmse)
print('R-squared:', r2)


RMSE: 3.862285016847045
R-squared: 0.034428745788238735
