## Megatutorial 04: Regression

In [9]:
from pandas import read_csv

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import root_mean_squared_error, mean_absolute_error

## Daten Laden

In [10]:
data = read_csv("../../data/bikesharing.csv", index_col=0)

## Data Preprocessing

### Imputing

In [11]:
imputer = SimpleImputer(strategy="median")
imputer.fit(data[["hum"]])
data["hum"] = imputer.transform(data[["hum"]])

### Encoding

In [12]:
season_encoder = LabelEncoder()
data["season"] = season_encoder.fit_transform(data["season"])

holiday_encoder = LabelEncoder()
data["holiday"] = holiday_encoder.fit_transform(data["holiday"])

weekday_encoder = LabelEncoder()
data["weekday"] = weekday_encoder.fit_transform(data["weekday"])

workingday_encoder = LabelEncoder()
data["workingday"] = workingday_encoder.fit_transform(data["workingday"])

weathersit_encoder = LabelEncoder()
data["weathersit"] = weathersit_encoder.fit_transform(data["weathersit"])

### Feature/Target Split

In [13]:
features = [
    'season', 'holiday', 'weekday', 'weathersit', 'temp',
    'atemp', 'hum', 'windspeed', 'month'
]

target = ['cnt']

X = data[features]
y = data[target]

### Train/Test Split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=875)

## Modelling

### Lineare Regression

In [17]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

predictions = linear_model.predict(X_test)

print(
    "R²", r2_score(y_test, predictions),
    ", RMSE", root_mean_squared_error(y_test, predictions),
    ", MAE", mean_absolute_error(y_test, predictions)
)

R² 0.5405958921616068 , RMSE 1373.1227620068096 , MAE 1163.1284232192193


### Decision Tree Regressor

In [22]:
tree_model = DecisionTreeRegressor(max_depth=5)
tree_model.fit(X_train, y_train)

predictions = tree_model.predict(X_test)

print(
    "R²", r2_score(y_test, predictions),
    ", RMSE", root_mean_squared_error(y_test, predictions),
    ", MAE", mean_absolute_error(y_test, predictions)
)

R² 0.544991012461358 , RMSE 1366.5386420626091 , MAE 1134.0577092648987
