# Simple Linear Regression

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

## Dataset

In [None]:
# Load dataset file
data = pd.read_csv('kc_house_data.csv', usecols=['price', 'sqft_living'])
data

In [None]:
# Numpy 배열로 전환
data_np = np.array(data)

x = data_np[:, 1]          # sqft_living
y = data_np[:, 0]          # price

# Dataset 시각화
fig = plt.figure(figsize=(8,6))
plt.scatter(x, y, color='b', marker='o', s=30)
plt.show()

In [None]:
# Dataset 정규화
x_mean = np.mean(x)
y_mean = np.mean(y)
x_std = np.std(x)
y_std = np.std(y)

x = (x-x_mean)/x_std
y = (y-y_mean)/y_std

# Train dataset / Test dataset 분할
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)

## Model


In [None]:
class LinearRegression():
    def __init__(self, learning_rate=0.001, n_iters=1000):
        # init

    def fit(self, x, y):
        # Update weights

    def predict(self, x):
        # Prediction


## Prediction

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

# loss
loss = np.sum((y_test - y_pred)**2)/len(y_test)
print(loss)

In [None]:
# Visualization
fig = plt.figure(figsize=(8,6))
plt.scatter(x_test, y_test, color='b', marker='o', s=30)
plt.plot(x_test, y_pred, 'r')
plt.show()

In [None]:
print(model.weights)
print(model.bias)

In [None]:
# sampling data
x_sampling = x_test[:10]
y_sampling = y_test[:10]

y_pred = model.predict(x_sampling)

# inverse normalization
x_sampling = (x_sampling * x_std) + x_mean
y_sampling = (y_sampling * y_std) + y_mean
y_pred = (y_pred * y_std)+y_mean

fig = plt.figure(figsize=(8,6))
plt.scatter(x_sampling, y_sampling, color='b', marker='o', s=30)
plt.scatter(x_sampling, y_pred, color='r', marker='o', s=30)
plt.show()

# Mutiple Linear Regression

## Dataset

In [None]:
# Load dataset file
data = pd.read_csv('kc_house_data.csv', usecols=['price', 'bedrooms', 'sqft_living'])

# Numpy 배열로 전환
data_np = np.array(data).astype(np.float64)

x = data_np[:, 1:]      # bedrooms, sqft_living
y = data_np[:, 0]       # price

# Dataset 시각화
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[:, 0], x[:, 1], y, color='b', marker='o', s=30)
plt.show()

In [None]:
# Normalization
x[:, 0] = (x[:, 0]-np.mean(x[:, 0]))/np.std(x[:, 0])
x[:, 1] = (x[:, 1]-np.mean(x[:, 1]))/np.std(x[:, 1])
y = (y-np.mean(y))/np.std(y)


# Train dataset / Test dataset 분할
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)

## Model

In [None]:
class LinearRegression():
    def __init__(self, learning_rate=0.001, n_iters=1000):
        # init

    def fit(self, x, y):
        # Update weights

    def predict(self, x):
        # Prediction


## Prediction

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

# loss
loss = np.sum((y_test - y_pred)**2)/len(y_test)
print(loss)

In [None]:
# Visualization
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x_test[:, 0], x_test[:, 1], y_test, color='b', marker='o', s=30)
ax.plot_trisurf(x_test[:, 0], x_test[:, 1], y_pred, color='r')
plt.show()