# The Sparks Foundation - Supervised Learning ML
- Created By Amey Mahendra Thakur

## Task: Predict the percentage of an student based on the no. of study hours.

## Step 1: Importing Libraries and Loading Dataset

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.style.use('ggplot')

In [None]:
data = pd.read_csv(r"../input/supervisedlearning/scores.csv")

In [None]:
data.head(10)

## Step 2: Exploratory Data Analysis

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
#Plotting the distribution of scores
data.plot(x='Hours', y='Scores', color='green', style='*')  
plt.title('Hours vs Percentage')  
plt.xlabel('Hours Studied')  
plt.ylabel('Percentage Scored')  
plt.show()

### Preparing the Data
The next step is to divide the data into attributes(inputs) and labels(outputs).

In [None]:
X = data.iloc[:, :-1].values
y = data.iloc[:, 1].values

### Splitting the data into Training and Testing sets.

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 0)

In [None]:
sns.distplot(y_train, kde=True, color='green',)
plt.title('Distribution of Scores')
plt.xlabel('Hours Studied')  
plt.ylabel('Percentage Scored')

In [None]:
sns.regplot(X_train, y_train, color='green', )
plt.title('Hours vs Scores')
plt.xlabel('Hours Studied')  
plt.ylabel('Percentage Scored')

## Step 3: Training the Algorithm
After splitting the data into training and testing sets, We have to train our Algorithm

In [None]:
from sklearn.linear_model import LinearRegression  
training = LinearRegression()  
training.fit(X_train, y_train) 
print("Training completed using " + str(training))

In [None]:
#predicting the test set results
y_pred = training.predict(X_test)

# Comparing Actual vs Predicted
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})  
df.head(5)

## Step 4: Visualizing the Results

In [None]:
#visualising the training set results
plt.scatter(X_train, y_train, color = 'green')
plt.plot(X_train,  training.predict(X_train), color= 'blue')
plt.title('Hours vs Scores (Training set)')
plt.xlabel('Hours')
plt.ylabel('Scores')
plt.show()

In [None]:
#visualising the test set results
plt.scatter(X_test, y_test, color = 'green')
plt.plot(X_train,  training.predict(X_train), color= 'blue')
plt.title('Hours vs Scores (Test set)')
plt.xlabel('Hours')
plt.ylabel('Scores')
plt.show()

## Step 5: Model Evaluation

In [None]:
k = X_test.shape[1]
n = len(X_test)
n

In [None]:
from sklearn.metrics import r2_score
from math import sqrt
from sklearn import metrics  
r2 = r2_score(y_test, y_pred)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

print('R2 =', r2, '\nAdjusted R2 =', adj_r2)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))

## Step 6: Predicting Future Data

In [None]:
Hours = 9.25
prediction = training.predict([[Hours]])
print("Number of Hours = {}".format(Hours))
print("Predicted Score = {}".format(prediction[0]))