In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
import pandas as pd
from numpy.linalg import inv

In [2]:
data = pd.read_csv('/content/sample_data/california_housing_test.csv')

In [3]:
data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [4]:
x1 = data.housing_median_age.to_numpy()
x2 = data.population.to_numpy()

y = data.median_house_value.to_numpy()

# <font color='blue'> Linear Regression

#### Renormalize variables

In [5]:
mean_x1 = sum(x1)/len(x1)
mean_x2 = sum(x2)/len(x2)

sd_x1, sd_x2 = np.std(x1), np.std(x2)



In [6]:
x1_r = (x1-mean_x1)/sd_x1
x2_r = (x2- mean_x2)/sd_x2

#### Plot Data

In [7]:
%matplotlib notebook

plt.plot(x1_r,y, 'rx')
plt.plot(x2_r, y, 'bx')
plt.show()


<IPython.core.display.Javascript object>

In [8]:
m= int(len(y))
n= 3
theta = np.random.random(n)

# concatenate three array into n*m matrix
X = np.reshape(np.concatenate((np.ones(m), x1_r, x2_r)), (n,m))

#### Define Cost function

In [9]:
def J(X, y, theta):
  h = np.dot(theta, X)
  cost = np.sum(np.square(h-y), axis= 0)/(2*m)
  return cost


In [10]:
J(X,y,theta)

27582148620.381084

#### Gradient Decent

In [11]:
def gradD(X,y, theta_init, alpha, iterations):
  cost_history = np.zeros(iterations)
  theta_history = np.zeros((iterations, n))
  theta = theta_init
  for it in range(iterations):
    h = np.dot(theta, X)
    theta = theta - (1/int(m)) * alpha * np.dot((h-y), X.T)
    theta_history[it,:] = theta
    cost_history[it] = J(X, y, theta)
  theta_res = theta
  return theta_res, theta_history, cost_history

In [12]:
theta_result, theta_hist, cost_hist = gradD(X, y, theta, 0.001, 15000)

In [13]:
theta_result

array([205846.21250209,  11315.48701414,   3258.45293726])

In [14]:
%matplotlib notebook

plt.plot(cost_hist)
plt.show()

<IPython.core.display.Javascript object>

#### Normal Equation

In [15]:
Theta = np.dot(np.dot(inv(X.dot(X.T)), X), y)
Theta

array([205846.275     ,  11315.68660675,   3258.65250279])

# <font color = 'blue'> Logistic Regression

In [16]:
import numpy as np
import matplotlib.pyplot as plt


In [17]:
data = pd.read_csv('/content/sample_data/california_housing_test.csv')

x1 = data.housing_median_age.to_numpy()
x2 = data.population.to_numpy()

y = data.median_house_value.to_numpy()

In [18]:
n= 3
m = len (y)

In [19]:
mean_x1 = sum(x1)/len(x1)
mean_x2 = sum(x2)/len(x2)

sd_x1, sd_x2 = np.std(x1), np.std(x2)

# normalization 
x1_r = (x1-mean_x1)/sd_x1  # like z-score
x2_r = (x2-mean_x2)/sd_x2  

In [20]:
theta = np.zeros([1, n])[0]
theta0 = theta[0]
thetax = theta[1:n]
print (theta)

# make the X matrix (n,m)
X = np.reshape(np.concatenate((np.ones(m), x1_r, x2_r)), (n, m))
x = np.reshape(np.concatenate((x1_r, x2_r)), (n-1, m))

[0. 0. 0.]


In [21]:
y.shape

(3000,)

#### Sigmoid Function

In [22]:
# sigmoid funciton

def sigmoid_h(X, theta):
  z = np.dot(theta, X)
  h = 1.0/(1.0 + np.exp(-z))
  return h

def sigmoid_h2(x, theta0, thetax):
  z = theta0 + np.dot(thetax, x)
  h = 1.0/(1.0 + np.exp(-z))
  return h


In [23]:
plt.plot(sigmoid_h2(x, theta0, thetax))
plt.plot(sigmoid_h(X, theta))
plt.show()

In [24]:
plt.plot(sigmoid_h2(x, theta0, thetax))
plt.show()

#### Cost Function

In [25]:
def J_log(X, y, theta):
  cost = - np.sum(y*np.log(sigmoid_h(X, theta)) + (1.0-y) * np.log(1.0 -sigmoid_h(X, theta)))/m
  return cost

def J_log_grad(X, y, theta):
  grad = np.dot(sigmoid_h(X,theta)-y, X.T)/m
  return grad

#### Gradient Descent

In [26]:
def gradD(X, y, theta_init, alpha, iterations):
  cost_history = np.zeros(iterations)
  theta_history = np.zeros((iterations, n))
  theta = theta_init
  for it in range(iterations):
    theta = theta - alpha * J_log_grad(X, y, theta)
    theta_history[it, :] = theta
    cost_history[it] = J_log(X, y, theta)
  return theta[:], theta_history, cost_history

In [27]:
theta_result, theta_hist, cost_hist = gradD(X, y, theta, 0.01, 15000)

  


# K-Nearest Neigbor

In [1]:
# calculate euclidean Distance
import math
def euclideanDistance(instance1, instance2, length):
	distance = 0
	for x in range(length):
		distance += pow((instance1[x] - instance2[x]), 2)
	return math.sqrt(distance)

In [3]:
euclideanDistance((1,2,3), (2,3,4), 3)

1.7320508075688772