In [67]:
# modules required in this notebook 

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split

In [68]:
# loading data with the help of pandas module

data=pd.read_csv(r"D:\combined cycle power plant\Combine Cycle Power Plant.csv")

In [69]:
# loading the first 5 rows of data 
# you can read about all the features in readme.txt

data.head()

Unnamed: 0,# T,V,AP,RH,EP
0,8.58,38.38,1021.03,84.37,482.26
1,21.79,58.2,1017.21,66.74,446.94
2,16.64,48.92,1011.55,78.76,452.56
3,31.38,71.32,1009.17,60.42,433.44
4,9.2,40.03,1017.05,92.46,480.38


In [70]:
# describing the data , it gives basic information about the database

data.describe()

Unnamed: 0,# T,V,AP,RH,EP
count,7176.0,7176.0,7176.0,7176.0,7176.0
mean,19.629712,54.288154,1013.263032,73.275818,454.431293
std,7.475256,12.751468,5.964863,14.625093,17.134571
min,1.81,25.36,992.89,25.56,420.26
25%,13.47,41.74,1009.01,63.2025,439.7375
50%,20.315,52.05,1012.91,74.895,451.74
75%,25.72,66.54,1017.3025,84.925,468.6675
max,35.77,81.56,1033.3,100.16,495.76


In [71]:
# checking if there are any null values in the database
# there are no null values in the database

data.isnull().sum()

# T    0
 V     0
 AP    0
 RH    0
 EP    0
dtype: int64

In [72]:
# storing input and output feature

X=data.drop(' EP',axis=1)
Y=data[' EP']
print(X.shape,Y.shape)

(7176, 4) (7176,)


In [73]:
# splitting the data into 2 -: train and test data
# we will trian the algorithm using train data 
# and find the score of algorithm using test data

X_train,X_test,Y_train,Y_test=train_test_split(X,Y)

In [74]:
# scaling the X_trian and X_test using StandardScaler 
# it makes mean =0 and variance=1 and
# all values lies between 0 -1 


scaler = StandardScaler()

scaler.fit(X_train)

X_train_scaled=scaler.transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [75]:
# predict function - it predict the output

def  predict(X,m,c):
    
    return np.sum(m*X,axis=1)+c

In [76]:
# step gradient code 

def step_gradient( X , Y , m , c , learning_rate ):
    
    n=X.shape[0]
    diff_bw_true_pred=Y-predict(X,m,c)
    
    for i in range(len(m)):
        
        k=np.sum(diff_bw_true_pred*X[:,i])/n
        m[i]=m[i] + (2*k*learning_rate)
    
    c=c+((2*learning_rate*np.sum(diff_bw_true_pred))/n)
    
    return m,c

In [77]:
# fiting the gd using X_trian and Y_train

def fit( X , Y , no_of_iterations , learning_rate ):
    
    m=np.zeros(X.shape[1])
    c=0
    
    for i in range(no_of_iterations):
        
        m,c=step_gradient(X,Y,m,c,learning_rate)
    
    return m,c

In [78]:
# cost func

def cost(Y_true,Y_pred):
    
    n=Y_true.shape[0]
    
    square_of_diff_of_predictions=(Y_true-Y_pred)**2
    
    cost=(1/n)*np.sum(square_of_diff_of_predictions)
    
    return cost

In [79]:
# score function it tells the efficiency of algorithm 

def score(Y_true,Y_pred):
    
    u=np.sum((Y_true-Y_pred)**2)
    v=np.sum((Y_true-np.mean(Y_true))**2)
    
    score=1-(u/v)
    
    return score

In [80]:
x_train=X_train.to_numpy()
y_train=Y_train.to_numpy()

m,c=fit(X_train_scaled , y_train ,100000 , 0.001)
y_pred=predict(X_test_scaled,m,c)

print( "Cost is =", cost( Y_test ,y_pred ) )
print( "Score is =", score( Y_test ,y_pred ) )

Cost is = 20.841913099844128
Score is = 0.9314241495894025
