# Data

In [32]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy

from sklearn.model_selection import train_test_split

In [8]:
df_rwanda = pd.read_csv('./data/rwanda')

X = df_rwanda['mean_light']
y = df_rwanda['wealth_index']

X = np.resize(X, (X_train.shape[0], 1))
y = np.resize(y, (y_train.shape[0], 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Parametric Regression

## Linear Regression

In [122]:
class LinearRegression:
    
    def __init__(self):
        self.coeff = None
        
    def train(self, X_train, y_train):
        intercept = np.ones((X_train.shape[0], 1))
        X = np.append(intercept, X_train, axis=1)
        beta = scipy.linalg.solve(np.dot(X.T, X), np.dot(X.T, y_train))
        self.coeff = beta
        print("MSE =", np.linalg.norm(y_train - np.dot(X, self.coeff)))
    
    def predict(self, x_test):
        intercept = [1.]
        x = np.append(intercept, x_test, axis=0)
        return np.dot(x.T, self.coeff)

In [126]:
estimator = LinearRegression()
estimator.train(X_train, y_train)
print("Coefficients :", estimator.coeff)
print("Prediction :", estimator.predict(X_test[11]))

MSE = 8.469370903109246
Coefficients : [[-0.33056575]
 [ 0.06853734]]
Prediction : [0.11150012]


## Ridge Regression

In [119]:
class RidgeRegression:
    
    def __init__(self, gamma):
        self.coeff = None
        self.gamma = gamma
        
    def train(self, X_train, y_train):        
        intercept = np.ones((X_train.shape[0], 1))
        X = np.append(intercept, X_train, axis=1)
        beta = scipy.linalg.solve(np.dot(X.T, X) + self.gamma * np.identity(X.shape[1]), np.dot(X.T, y_train))
        #beta = np.dot(X.T, np.linalg.inv(np.dot(X, X.T) + gamma * np.identity(X.shape[0])).dot(y_train))
        self.coeff = beta
        print("MSE =", np.linalg.norm(y_train - np.dot(X, self.coeff)))
    
    def predict(self, x_test):
        intercept = [1.]
        x = np.append(intercept, x_test, axis=0)
        return np.dot(x.T, self.coeff)

In [136]:
estimator = RidgeRegression(gamma=0)
estimator.train(X_train, y_train)
print("Coefficients :", estimator.coeff)
print("Prediction :", estimator.predict(X_test[11]))

MSE = 8.469370903109246
Coefficients : [[-0.33056575]
 [ 0.06853734]]
Prediction : [0.11150012]


## Lasso Regression

## Bayesian Ridge Regression

## Gaussian Process Regression

# Nonparametric Regression

## Geometric approaches

### k-Nearest-Neighbors Regression

In [94]:
def distance(x, y):
    return np.linalg.norm(x - y)

def predict(x_test, X_train, y_train, k):
    distance_matrix = [distance(x_test, X_train[i]) for i in range(X_train.shape[0])]
    idx = np.argsort(distance_matrix)[:k]
    return np.mean(y_train[idx])
    
y = predict(X_test[11], X_train, y_train, 50)
print(y, y_test[11])

0.0035071999999999937 [0.01353]


### SVR

## Projection on a functional basis

### Polynomial basis

### Gaussian basis

## Kernel methods

### Locally weighted linear regression

### Kernel Ridge Regresion

# Model selection