In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

from sklearn import linear_model, datasets
from sklearn.metrics import mean_squared_error, mean_absolute_error

### Dasar

Implementasi dasar regresi linier dengan menggunakan modul Sklearn.

#### 1. Ordinary Least Squares

Source: https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py

In [2]:
data_x, data_y = datasets.load_diabetes(return_X_y=True, as_frame=True)

In [3]:
data_x.tail()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
437,0.041708,0.05068,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.05068,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485
439,0.041708,0.05068,-0.015906,0.017282,-0.037344,-0.01384,-0.024993,-0.01108,-0.046879,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.02656,0.044528,-0.02593
441,-0.045472,-0.044642,-0.07303,-0.081414,0.08374,0.027809,0.173816,-0.039493,-0.00422,0.003064


In [4]:
data_y.tail()

437    178.0
438    104.0
439    132.0
440    220.0
441     57.0
Name: target, dtype: float64

In [5]:
data_bmi = data_x[['bmi']]

# Split the data into training/testing sets
data_bmi_train = data_bmi[:-20]
data_bmi_test = data_bmi[-20:]

data_y_train = data_y[:-20]
data_y_test = data_y[-20:]

In [6]:
# Train
reg = linear_model.LinearRegression()
reg.fit(data_bmi_train, data_y_train)

LinearRegression()

In [7]:
# Predict
predictions = reg.predict(data_bmi_test)
predictions = pd.DataFrame(predictions, columns=['predictions'])

In [8]:
# Print hasil prediksi
print('Coefficients: {}'.format(reg.coef_))
print('Mean Squared Error (MSE): {}'.format(mean_squared_error(data_y_test, predictions)))
print('Mean Absolute Error (MAE): {}'.format(mean_absolute_error(data_y_test, predictions)))

Coefficients: [938.23786125]
Mean Squared Error (MSE): 2548.0723987259694
Mean Absolute Error (MAE): 41.227091289761454


In [9]:
# Visualisasi hasil prediksi
df_results = pd.DataFrame()
df_results['Feature Test'] = data_bmi_test['bmi'].copy()
df_results['Target Actual'] = data_y_test.copy()
df_results.reset_index(drop=True, inplace=True)
df_results['Target Predicted'] = predictions.copy()

# Create plotly figure
fig = px.scatter(df_results, x='Feature Test', y='Target Actual')
fig.add_scatter(x=df_results['Feature Test'], y=df_results['Target Predicted'], mode='lines', name='Predicted')
fig.show()

## Case Studies

In [None]:
data = 'https://raw.githubusercontent.com/rudyhendrawn/data-course/main/datasets/melbourne_housing_extra_data.csv'
df = pd.read_csv(data)
df.head()

In [None]:
df.isna().sum()

In [None]:
len(df)