# Gradient Descent - Combined Cycle Power Plant

    Combined Cycle Power Plant dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to predict the net hourly electrical energy output (EP) of the plant.
    You are given:
    1. A Readme file for more details on dataset. 
    2. A Training dataset csv file with X train and Y train data
    3. A X test File and you have to predict and submit predictions for this file.

In [1]:
import numpy as np
import pandas as pd
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler

In [2]:
Train_Data=np.loadtxt('0000000000002419_training_ccpp_x_y_train.csv', delimiter=',')
x_test=np.loadtxt('0000000000002419_test_ccpp_x_test.csv', delimiter=',')

In [3]:
x_train = Train_Data[:, 0:Train_Data.shape[1]-1]
y_train = Train_Data[:, Train_Data.shape[1]-1]

In [33]:
y_train

array([482.26, 446.94, 452.56, ..., 437.65, 459.97, 444.42])

In [4]:
# adding dummy data to training data
df = pd.DataFrame(x_train)
N = x_train.shape[1]
count =1
for i in range(N):
    for j in range(i, N):
        df[count] = df[df.columns[i]] * df[df.columns[j]]
        count += 1

for i in range(N):
    for j in range(i, N):
        for k in range(j, N):
            df[count] = df[df.columns[i]]*df[df.columns[j]]*df[df.columns[k]]
            count += 1

x_train = df.values

# adding dummy data to testing data
df = pd.DataFrame(x_test)
N = x_test.shape[1]
count =1
for i in range(N):
    for j in range(i, N):
        df[count] = df[df.columns[i]] * df[df.columns[j]]
        count += 1

for i in range(N):
    for j in range(i, N):
        for k in range(j, N):
            df[count] = df[df.columns[i]]*df[df.columns[j]]*df[df.columns[k]]
            count += 1

x_test = df.values

In [5]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [21]:
def step_gradient(X, Y, m, learning_rate):
    m_slope=np.zeros(len(X[0]))
    for i in range(len(X)):
        x=X[i]
        y=Y[i]
        for j in range(len(x)):
            m_slope[j]+=(-2/len(X))*(y-sum(m*x))*x[j]
    new_m=m-(learning_rate*m_slope)
    return new_m

def cost(m, x, y):
    cost=0
    for i in range(len(x)):
        cost+=(1/len(x))*((y[i]-sum(m*x[i]))**2)
    print(cost)

def gd(x, y, learning_rate, iterations):
    m=np.zeros(len(x[0]))
    for i in range(iterations):
        m=step_gradient(x, y, m, learning_rate)
        print(i, "cost=", end=' ')
        cost(m, x, y)
    return m

def RUN(x, y):
    iterations=1000
    learning_rate=0.01
    x=np.append(x, np.ones(len(x)).reshape(-1, 1), axis=1)
    m=gd(x, y, learning_rate, iterations)
    return m

In [22]:
m  = RUN(x_train,y_train)

0 cost= 198498.19781515317
1 cost= 190617.8199638807
2 cost= 183065.64013940858
3 cost= 175815.37606381142
4 cost= 168852.79933818252
5 cost= 162166.1242418787
6 cost= 155744.35271140622
7 cost= 149576.9772788947
8 cost= 143653.91626687866
9 cost= 137965.4891673317
10 cost= 132502.39941454763
11 cost= 127255.71893631233
12 cost= 122216.87350482265
13 cost= 117377.6287006719
14 cost= 112730.07643914351
15 cost= 108266.62203197273
16 cost= 103979.97176252994
17 cost= 99863.1209539247
18 cost= 95909.34251042182
19 cost= 92112.17591338654
20 cost= 88465.41665370108
21 cost= 84963.10608333292
22 cost= 81599.52166941063
23 cost= 78369.16763483321
24 cost= 75266.76597006965
25 cost= 72287.24780141574
26 cost= 69425.74510155665
27 cost= 66677.58272885864
28 cost= 64038.27078233675
29 cost= 61503.497259775584
30 cost= 59069.121006971014
31 cost= 56731.16494654264
32 cost= 54485.809575225896
33 cost= 52329.38671898685
34 cost= 50258.373535736304
35 cost= 48269.38675581551
36 cost= 46359.17715082

294 cost= 31.414356367391775
295 cost= 31.340736273771068
296 cost= 31.269357017103452
297 cost= 31.200132834157344
298 cost= 31.132981344804303
299 cost= 31.067823418105327
300 cost= 31.004583043700432
301 cost= 30.943187208291526
302 cost= 30.883565777014958
303 cost= 30.82565137951224
304 cost= 30.769379300512547
305 cost= 30.714687374747186
306 cost= 30.66151588602535
307 cost= 30.60980747030607
308 cost= 30.55950702260989
309 cost= 30.510561607613614
310 cost= 30.462920373788382
311 cost= 30.41653447093481
312 cost= 30.371356970986707
313 cost= 30.32734279194665
314 cost= 30.284448624837953
315 cost= 30.24263286354541
316 cost= 30.201855537436426
317 cost= 30.162078246648903
318 cost= 30.12326409994206
319 cost= 30.08537765500792
320 cost= 30.04838486114623
321 cost= 30.012253004209683
322 cost= 29.976950653727744
323 cost= 29.942447612126376
324 cost= 29.908714865955844
325 cost= 29.8757245390522
326 cost= 29.84344984755114
327 cost= 29.811865056684198
328 cost= 29.78094543928599

581 cost= 26.914482848569353
582 cost= 26.90908428898709
583 cost= 26.903709498422
584 cost= 26.8983583712822
585 cost= 26.893030802469823
586 cost= 26.887726687376833
587 cost= 26.882445921883065
588 cost= 26.87718840235184
589 cost= 26.871954025627737
590 cost= 26.866742689033284
591 cost= 26.861554290365646
592 cost= 26.85638872789457
593 cost= 26.851245900357835
594 cost= 26.846125706960073
595 cost= 26.841028047369083
596 cost= 26.83595282171347
597 cost= 26.830899930579275
598 cost= 26.82586927500809
599 cost= 26.820860756493737
600 cost= 26.815874276979823
601 cost= 26.810909738857575
602 cost= 26.80596704496294
603 cost= 26.801046098573668
604 cost= 26.79614680340783
605 cost= 26.791269063620113
606 cost= 26.786412783800838
607 cost= 26.781577868972274
608 cost= 26.77676422458694
609 cost= 26.771971756525538
610 cost= 26.767200371093722
611 cost= 26.762449975020594
612 cost= 26.757720475456143
613 cost= 26.75301177996949
614 cost= 26.748323796545748
615 cost= 26.743656433584736

867 cost= 26.034510577359892
868 cost= 26.032975278654973
869 cost= 26.031446684992936
870 cost= 26.02992476679765
871 cost= 26.028409494624032
872 cost= 26.02690083915643
873 cost= 26.025398771209016
874 cost= 26.023903261724282
875 cost= 26.02241428177347
876 cost= 26.020931802555385
877 cost= 26.019455795395736
878 cost= 26.01798623174713
879 cost= 26.016523083187835
880 cost= 26.015066321421994
881 cost= 26.013615918278038
882 cost= 26.01217184570913
883 cost= 26.010734075792378
884 cost= 26.00930258072764
885 cost= 26.007877332838035
886 cost= 26.006458304568472
887 cost= 26.005045468485463
888 cost= 26.003638797276942
889 cost= 26.00223826375105
890 cost= 26.00084384083605
891 cost= 25.999455501579675
892 cost= 25.9980732191488
893 cost= 25.996696966828463
894 cost= 25.995326718021683
895 cost= 25.99396244624905
896 cost= 25.99260412514783
897 cost= 25.99125172847191
898 cost= 25.989905230090553
899 cost= 25.98856460398896
900 cost= 25.987229824266567
901 cost= 25.985900865137484

In [23]:
def predict(x, m, c):
    N = x.shape[0]
    y = np.zeros(N)
    
    for i in range(N):
        X = x[i,:]
        y[i] = (m * X).sum() + c
        
    return y

In [34]:
Y_pred = predict(x_test,m[:31],m[31])

In [35]:
np.savetxt('Predictions_of_ccpp.csv',Y_pred, fmt = "%.8f")