In [4]:
import pandas as pd

# load the dataset
df = pd.read_csv("https://raw.githubusercontent.com/HasanRoknabady/dataset-popularity-/main/OnlineNewsPopularity.csv", delimiter=", ", engine="python")

# remove the trailing whitespace from column names
df.columns = df.columns.str.strip()

# select only the relevant columns
df = df[["n_tokens_title", "n_tokens_content", "n_unique_tokens", "num_hrefs", "num_self_hrefs", "num_imgs", "num_videos", "average_token_length", "num_keywords", "data_channel_is_lifestyle", "data_channel_is_entertainment", "data_channel_is_bus", "data_channel_is_socmed", "data_channel_is_tech", "data_channel_is_world", "weekday_is_monday", "weekday_is_tuesday", "weekday_is_wednesday", "weekday_is_thursday", "weekday_is_friday", "weekday_is_saturday", "weekday_is_sunday", "is_weekend", "shares"]]

# split the data into input and output variables
X = df.drop(columns=["shares"])
y = df["shares"]

# split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# fit a linear regression model
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# calculate the performance metrics
from sklearn.metrics import mean_absolute_error
lr_mae = mean_absolute_error(y_test, lr_model.predict(X_test))
print("Linear Regression MAE:", lr_mae)

# absolute error loss
def abs_error(y_true, y_pred):
    return abs(y_true - y_pred)

# epsilon-sensitive loss
def epsilon_sensitive_error(y_true, y_pred, epsilon=1.0):
    loss = abs(y_true - y_pred)
    mask = loss > epsilon
    loss[mask] = loss[mask] - 0.5 * epsilon
    loss[~mask] = 0.5 * loss[~mask]**2 / epsilon
    return loss

# Huber loss
def huber_loss(y_true, y_pred, delta=1.0):
    loss = abs(y_true - y_pred)
    mask = loss > delta
    loss[mask] = delta * loss[mask] - 0.5 * delta**2
    loss[~mask] = 0.5 * loss[~mask]**2
    return loss

# evaluate the absolute error loss
ae_mae = mean_absolute_error(y_test, lr_model.predict(X_test), multioutput='raw_values')
ae_mae = abs_error(y_test, lr_model.predict(X_test)).mean()
print("Absolute Error MAE:", ae_mae)

# evaluate the epsilon-sensitive loss
es_mae = epsilon_sensitive_error(y_test, lr_model.predict(X_test)).mean()
print("Epsilon-Sensitive Error MAE:", es_mae)

# evaluate the Huber loss
h_mae = huber_loss(y_test, lr_model.predict(X_test)).mean()
print("Huber Loss MAE:", h_mae)


Linear Regression MAE: 3059.717817837649
Absolute Error MAE: 3059.717817837649
Epsilon-Sensitive Error MAE: 3059.217871565656
Huber Loss MAE: 3059.217871565656
