## Gradient Descent - Combined Cycle Power Plant

In [1]:
import numpy as np

In [2]:
training_data = np.genfromtxt("train.csv",delimiter=",")
testing_data = np.genfromtxt("test.csv",delimiter=",")

In [11]:
X = training_data[:,:-1]
Y = training_data[:,-1]
X_test = testing_data

(array([[   8.58,   38.38, 1021.03,   84.37],
        [  21.79,   58.2 , 1017.21,   66.74],
        [  16.64,   48.92, 1011.55,   78.76],
        ...,
        [  29.8 ,   69.34, 1009.36,   64.74],
        [  16.37,   54.3 , 1017.94,   63.63],
        [  30.11,   62.04, 1010.69,   47.96]]),
 array([482.26, 446.94, 452.56, ..., 437.65, 459.97, 444.42]))

In [4]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_transform=sc.fit_transform(X)
X_test_transform = sc.transform(X_test)

In [14]:
# This function finds the new cost after each optimisation.
def cost(points, m):
    rows , features = points.shape
    total_cost = 0
    for i in range(rows):
        mx = 0
        for j in range(features-1):
            x = points[i, j]
            mx += m[j]*x
            
        mx+=m[features-1]
        
        y = points[i, features-1]
        
        
        total_cost += (1/rows)*((y - mx)**2)
    return total_cost

In [5]:
# This function finds the new gradient at each step
def step_gradient(points, learning_rate, m ):
    rows , features = points.shape
    m_slope = [ 0 for i in range(features)]
    
    for i in range(rows):
        mx = 0
        for j in range(features-1):
            x = points[i, j]
            mx += m[j]*x
            
        mx+=m[features-1]
        
        y = points[i, features-1]
        
        for j in range(features-1):
            x=points[i,j]
            m_slope[j] += (-2/rows)* (y - mx)*x
            
        m_slope[features-1] += (-2/rows)* (y - mx)
    
    new_m = [ 0 for i in range(features)]
    for i in range(features):
        new_m[i] = m[i] - learning_rate * m_slope[i]
    
    return new_m

In [32]:
# The Gradient Descent Function
def gd(points, learning_rate, num_iterations):
    rows , features = points.shape
    m = [0 for i in range(features)]       # Intial random value taken as 0
    
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m )
#         print(i, " Cost: ", cost(points, m))
    return m

In [33]:
def run(training_data,learning_rate,num_iterations):
    m = gd(training_data, learning_rate, num_iterations)
    intercept = m[-1]
    coeff = m[:-1]
    return intercept,coeff

In [34]:
learning_rate = 0.0000008
num_iterations = 100

intercept,coeff = run(training_data,learning_rate,num_iterations)

print(intercept,coeff)

0.0004338949153937402 [-0.009062195937240178, -0.004475104086705936, 0.44562440606069503, 0.04568495778504347]


In [35]:
def predict(final_m, final_c, testing_data):
    y_pred = []
    rows , features = testing_data.shape
    
    for i in range(rows):
        mx=0
        for j in range(features):
            mx+=final_m[j]*testing_data[i][j]
        ans = mx + final_c
        y_pred.append(ans)
    return y_pred

In [36]:
y_predict = predict(coeff,intercept,X_test_transform)

In [37]:
np.savetxt('Predictions.csv',y_predict)

# Fitting the data with SkLearn

In [39]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_transform, Y)
lin_reg.intercept_, lin_reg.coef_

(454.43129319955403,
 array([-14.90272935,  -2.89427819,   0.34973996,  -2.34230172]))

In [40]:
lin_reg.predict(X_test_transform)

array([469.95711113, 471.76632733, 433.85647002, ..., 439.13871988,
       450.65930697, 447.27281902])