## Gradient Descent - Combined Cycle Power Plant

In [167]:
import numpy as np

In [168]:
training_data = np.genfromtxt("train.csv",delimiter=",")
testing_data = np.genfromtxt("test.csv",delimiter=",")

In [169]:
X = training_data[:,:-1]
Y = training_data[:,-1]
X_test = testing_data

In [170]:
from sklearn.preprocessing import MinMaxScaler
sc=MinMaxScaler()
X_transform=sc.fit_transform(X)
X_test_transform = sc.transform(X_test)

In [171]:
# This function finds the new cost after each optimisation.
def cost(points, Y, m):
    rows , features = points.shape
    total_cost = 0
    for i in range(rows):
        mx = 0
        for j in range(features):
            x = points[i, j]
            mx += m[j]*x
            
        mx+=m[features]
        
        y = Y[i]
        
        
        total_cost += (1/rows)*((y - mx)**2)
    return total_cost

In [172]:
# This function finds the new gradient at each step
def step_gradient(points, Y, learning_rate, m ):
    rows , features = points.shape
    m_slope = [ 0 for i in range(features+1)]
    
    for i in range(rows):
        mx = 0
        for j in range(features):
            x = points[i, j]
            mx += m[j]*x
            
        mx+=m[features]
        
        y = Y[i]
        
        for j in range(features):
            x=points[i,j]
            m_slope[j] += (-2/rows)* (y - mx)*x
            
        m_slope[features] += (-2/rows)* (y - mx)
    
    new_m = [ 0 for i in range(features+1)]
    for i in range(features+1):
        new_m[i] = m[i] - learning_rate * m_slope[i]
    
    return new_m

In [173]:
# The Gradient Descent Function
def gd(points, Y, learning_rate, num_iterations):
    rows , features = points.shape
    m = [0 for i in range(features+1)]       # Intial random value taken as 0
    
    for i in range(num_iterations):
        m = step_gradient(points, Y, learning_rate, m )
#         print(i, " Cost: ", cost(points, Y, m))
    return m

In [174]:
def run(training_data, Y,learning_rate,num_iterations):
    m = gd(training_data, Y,learning_rate, num_iterations)
    intercept = m[-1]
    coeff = m[:-1]
    return intercept,coeff

In [175]:
learning_rate = 0.4455
num_iterations = 1000
# print(X_transform.shape)
intercept,coeff = run(X_transform,Y,learning_rate,num_iterations)

print(intercept,coeff)

498.1057608886754 [-61.326919317978536, -16.45309670760651, 5.4124127764521415, -9.025462089978594]


In [176]:
def predict(final_m, final_c, testing_data):
    y_pred = []
    rows , features = testing_data.shape
    
    for i in range(rows):
        mx=0
        for j in range(features):
            mx+=final_m[j]*testing_data[i][j]
        ans = mx + final_c
        y_pred.append(ans)
    return y_pred

In [177]:
y_predict = predict(coeff,intercept,X_test_transform)

In [178]:
np.savetxt('Predictions.csv',y_predict)

In [179]:
# y_predict

# Fitting the data with SkLearn

In [180]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_transform, Y)
lin_reg.intercept_, lin_reg.coef_

(502.97364478554374,
 array([-67.70764164, -12.75694479,   2.369539  , -11.94849742]))

In [181]:
lin_reg.predict(X_test_transform)

array([469.95711113, 471.76632733, 433.85647002, ..., 439.13871988,
       450.65930697, 447.27281902])