In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
import math

In [3]:
def cost(x, y, m):
    return ((y - np.dot(x, m)) ** 2).mean()

In [4]:
def step_gradient(x, y, learning_rate, m):
    k = x.shape[0]
    n = x.shape[1]
    batch_size = 10
    num_batches = math.ceil(k/batch_size)
    for a in range(num_batches):
        slope_m = np.zeros(m.shape)
        for j in range(n):
            start_row = batch_size * a
            end_row = min(batch_size * (a + 1), k)
            for i in range(start_row, end_row):
                y_pred_current = x[i].dot(m)
                y_actual = y[i]
                slope_m[j] += (2/k)*x[i][j]*(y_pred_current - y_actual)
        m = m - learning_rate*slope_m
    return m

In [5]:
def gradient_descent(x, y, learning_rate=0.1, num_iter =1000):
    m = np.zeros(x.shape[1])
    print("start:", cost(x, y, m))
    for i in range(num_iter):
        m = step_gradient(x, y, learning_rate, m)
        if (i % (num_iter//10) == 0):
            print(i, ":", cost(x, y, m))
    print("end:", cost(x, y, m))
    return m

In [6]:
def run(x, y):
    learning_rate = 0.00003
    num_iter = 10000
    m = gradient_descent(x, y, learning_rate, num_iter)
    return m

In [7]:
def score(y, y_pred):
    u = ((y - y_pred)**2).sum()
    d = ((y - y.mean())**2).sum()
    return 1 - u/d

In [8]:
import pandas as pd

In [9]:
boston_train=pd.read_csv("0000000000002417_training_boston_x_y_train (2).csv")

In [10]:
boston_train.shape

(379, 14)

In [11]:
boston_train

Unnamed: 0,# CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Y
0,-0.407850,-0.487722,-1.266023,-0.272599,-0.576134,1.239974,0.840122,-0.520264,-0.752922,-1.278354,-0.303094,0.410571,-1.097990,37.9
1,-0.407374,-0.487722,0.247057,-0.272599,-1.016689,0.001946,-0.838337,0.336351,-0.523001,-0.060801,0.113032,0.291169,-0.520474,21.4
2,0.125179,-0.487722,1.015999,-0.272599,1.367490,-0.439699,0.687212,-0.577309,1.661245,1.530926,0.806576,-3.795795,0.891076,12.7
3,0.028304,-0.487722,1.015999,-0.272599,1.859875,-0.047918,0.801005,-0.712836,1.661245,1.530926,0.806576,-0.066050,0.215438,19.9
4,-0.412408,-0.487722,-0.969827,-0.272599,-0.913029,-0.384137,-0.834781,0.300508,-0.752922,-0.957633,0.020560,0.431074,0.029007,22.5
5,-0.416934,1.014463,-1.403178,-0.272599,-0.973497,1.362495,-0.685427,1.541554,-0.982843,-0.737880,-1.366528,0.417369,-1.004074,32.7
6,-0.002920,-0.487722,1.015999,-0.272599,0.218592,0.217069,0.228480,-0.427139,1.661245,1.530926,0.806576,0.402348,0.239268,23.2
7,-0.225559,-0.487722,1.231945,-0.272599,2.732346,-0.231698,1.021481,-1.034899,-0.523001,-0.031105,-1.736418,0.176811,0.202823,21.5
8,-0.395561,0.456508,-0.769931,-0.272599,-1.068519,-0.083534,0.377835,2.025290,-0.293081,-0.464673,0.297977,0.213432,-0.350864,20.5
9,-0.413779,2.945843,-1.403178,-0.272599,-1.301754,1.428029,-1.225947,1.670427,-0.867883,-0.470612,-2.707379,0.441052,-1.201719,33.3


In [12]:
x_train=boston_train.iloc[:,0:13]

In [13]:
x_train.shape

(379, 13)

In [14]:
y_train=boston_train.iloc[:,13:14]

In [15]:
y_train.shape

(379, 1)

In [16]:
x_train

Unnamed: 0,# CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.407850,-0.487722,-1.266023,-0.272599,-0.576134,1.239974,0.840122,-0.520264,-0.752922,-1.278354,-0.303094,0.410571,-1.097990
1,-0.407374,-0.487722,0.247057,-0.272599,-1.016689,0.001946,-0.838337,0.336351,-0.523001,-0.060801,0.113032,0.291169,-0.520474
2,0.125179,-0.487722,1.015999,-0.272599,1.367490,-0.439699,0.687212,-0.577309,1.661245,1.530926,0.806576,-3.795795,0.891076
3,0.028304,-0.487722,1.015999,-0.272599,1.859875,-0.047918,0.801005,-0.712836,1.661245,1.530926,0.806576,-0.066050,0.215438
4,-0.412408,-0.487722,-0.969827,-0.272599,-0.913029,-0.384137,-0.834781,0.300508,-0.752922,-0.957633,0.020560,0.431074,0.029007
5,-0.416934,1.014463,-1.403178,-0.272599,-0.973497,1.362495,-0.685427,1.541554,-0.982843,-0.737880,-1.366528,0.417369,-1.004074
6,-0.002920,-0.487722,1.015999,-0.272599,0.218592,0.217069,0.228480,-0.427139,1.661245,1.530926,0.806576,0.402348,0.239268
7,-0.225559,-0.487722,1.231945,-0.272599,2.732346,-0.231698,1.021481,-1.034899,-0.523001,-0.031105,-1.736418,0.176811,0.202823
8,-0.395561,0.456508,-0.769931,-0.272599,-1.068519,-0.083534,0.377835,2.025290,-0.293081,-0.464673,0.297977,0.213432,-0.350864
9,-0.413779,2.945843,-1.403178,-0.272599,-1.301754,1.428029,-1.225947,1.670427,-0.867883,-0.470612,-2.707379,0.441052,-1.201719


In [17]:
y_train

Unnamed: 0,Y
0,37.9
1,21.4
2,12.7
3,19.9
4,22.5
5,32.7
6,23.2
7,21.5
8,20.5
9,33.3


In [18]:
from sklearn.linear_model import LinearRegression

In [19]:
import numpy as np

In [20]:
clf=LinearRegression()

In [21]:
from sklearn.preprocessing import StandardScaler

In [22]:
scaler=StandardScaler()

In [23]:
x_trainScaled=scaler.fit_transform(x_train)

In [24]:
x_trainScaled

array([[-0.40098068, -0.49042688, -1.28149216, ..., -0.32264241,
         0.42027996, -1.10095452],
       [-0.40053396, -0.49042688,  0.20753471, ...,  0.09391068,
         0.30257958, -0.53143278],
       [ 0.09900799, -0.49042688,  0.96425328, ...,  0.78816582,
        -3.72614514,  0.86057789],
       ...,
       [-0.40141319, -0.49042688,  0.20753471, ...,  0.09391068,
         0.34289385, -0.34758231],
       [-0.40357903, -0.49042688, -1.16949207, ..., -0.7391955 ,
         0.21589851, -0.75260628],
       [ 0.30324229, -0.49042688,  0.96425328, ...,  0.78816582,
         0.39790715, -1.35806871]])

In [25]:
clf.fit(x_trainScaled,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [26]:
x_test=pd.read_csv("0000000000002417_test_boston_x_test (1).csv")

In [27]:
x_test.shape

(126, 13)

In [28]:
x_testScaled=scaler.transform(x_test)

In [29]:
yp=clf.predict(x_testScaled)

In [30]:
df=pd.DataFrame(yp)
df.to_csv("Solution.csv",index=False,encoding='utf-8')
ans=pd.read_csv("Solution.csv")

In [31]:
ans

Unnamed: 0,0
0,29.033673
1,22.371645
2,24.477787
3,20.601670
4,2.725332
5,30.400004
6,24.861200
7,18.657250
8,23.539858
9,24.113969
