In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

# Import the data

In [None]:
data = pd.read_csv("../input/tsf-datasets/student_scores.csv")
data.head()

In [None]:
data.shape

In [None]:
# check for null values
data.isnull().any()

In [None]:
# defining the variables
X = data["Hours"]
y = data["Scores"]

# PLot the data

In [None]:
plt.scatter(X,y,c = "r",marker="x")
plt.xlabel("No. of hours")
plt.ylabel("Scores")
plt.title("Hours vs Score")
plt.grid(True)
plt.show()

In [None]:
# adding another axis
X = X[:,np.newaxis]
y = y[:,np.newaxis]
theta = np.zeros([2,1])

In [None]:
# allocate valoes to the variables
iterations = 500
alpha = 0.01
m = len(y)
ones = np.ones((m,1))
X = np.hstack((ones, X))

# Implementing the cost function

In [None]:
def computeCost(X,y,theta):
    hx = np.dot(X,theta)
    prediction = hx - y
    cost = np.sum(np.power(prediction,2))/(2*m)
    return cost

J = computeCost(X,y,theta)
print(J)

# Implementing the gradient descent function

In [None]:
def gradientDescent(X,y,theta,alpha,iterations):
    J_history=[]
    for _ in range(iterations):
        predictions = X.dot(theta)
        error = np.dot(X.transpose(),(predictions -y))
        descent=alpha * 1/m * error
        theta-=descent
        J_history.append(computeCost(X,y,theta))
    
    return theta, J_history

theta,J_history = gradientDescent(X,y,theta,0.01,500)
print("h(x) ="+str(round(theta[0,0],2))+" + "+str(round(theta[1,0],2))+"x1")

# Plotting 3D graph

In [None]:
from mpl_toolkits.mplot3d import Axes3D

#Generating values for theta0, theta1 and the resulting cost value
theta0_vals=np.linspace(-10,10,100)
theta1_vals=np.linspace(-1,4,100)
J_vals=np.zeros((len(theta0_vals),len(theta1_vals)))

for i in range(len(theta0_vals)):
    for j in range(len(theta1_vals)):
        t=np.array([theta0_vals[i],theta1_vals[j]])
        J_vals[i,j]=computeCost(X,y,t)

#Generating the surface plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf=ax.plot_surface(theta0_vals,theta1_vals,J_vals,cmap="coolwarm")
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel("$\Theta_0$")
ax.set_ylabel("$\Theta_1$")
ax.set_zlabel("$J(\Theta)$")

#rotate for better angle
ax.view_init(30,120)

# Fitting the line over the data

In [None]:
def myfit(xval):
    return theta[0] + theta[1]*xval

plt.figure(figsize=(10,6))
plt.plot(X[:,1],y[:,0],'rx',markersize=10,label='Training Data')
plt.plot(X[:,1],myfit(X[:,1]),'b-',label = 'Hypothesis: h(x) = %0.2f + %0.2fx'%(theta[0],theta[1]))
plt.grid(True) 
plt.ylabel('Percentage')
plt.xlabel('Hours')
plt.legend()

# Making predictions

In [None]:
def predict(x,theta):
    """
    Takes in numpy array of x and theta and return the predicted value of y based on theta
    """
    predictions= np.dot(theta.transpose(),x)
    
    return predictions[0]

predict1 = predict(np.array([1,9.25]),theta)
print("For Hours = 9.25 we predict a percentage of "+str(round(predict1,2)))

In [None]:
actual1 = data.loc[6]["Scores"]
actual1

# Calculating the errors
## 1. Mean squared error

In [None]:
MSE = np.square(np.subtract(actual1,predict1)).mean()
MSE

## 2.Root mean squared error

In [None]:
import math
RMSE = math.sqrt(MSE)
RMSE

# Conclusion

Student percentage is predicted based on number of hours using **Gradient Descent** function that I learnt in AndrewNg Course.

The loss can be decreased futher by increasing ***no. of iterations*** or ***changing alpha values*** but I've kept it less as we have a small dataset.

Try to Implement this function on some large dataset

If you find this notebook useful *upvote* it so that everyone can get benefit.
Also, in case of any queries, feel free to ask..