# Importing libraries

In [None]:
# Importing all libraries required in this notebook
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
np.random.seed(100)
sns.set()
plt.rcParams['figure.figsize'] = 18, 8

In [None]:
# Reading data
df = pd.read_csv('student_scores - student_scores.csv')

In [None]:
#we can see that our data has two columns hours and scores
#In this project we will predict scores on the basis of no of Hours.
df.head()

# Show Data Information

In [None]:
# Show data Info
df.info()

In [None]:
# Change the type of Scores
df['Scores'] = df['Scores'].astype('float64')
df.info()

In [None]:
# To see satistical details of our dataframe
df.describe().all

#  Now lets see the relationship between these columns

In [None]:
# Now lets see the relationship between these columns
sns.scatterplot(data=df, x='Hours', y='Scores')
plt.title('Hours vs Scores')
plt.xlabel('Hours Studied')
plt.ylabel(' Scores %')
plt.show()

In [None]:
plt.hist(df['Hours'])

In [None]:
plt.hist(df['Scores'])

In [None]:
sns.kdeplot(df['Hours'])

In [None]:
sns.kdeplot(df['Scores'])

# Preparing the data¶


In [None]:
# The next step is to divide the data into "attributes" (inputs) and "labels" (outputs).
X = df['Hours'].values
y = df['Scores'].values

In [None]:
X.shape

In [None]:
y.shape

# Reshape

In [None]:
X = X.reshape(25, 1)
X.shape

In [None]:
# Now split our data into train and test set. Here our model will train its parameters by train set and then predict on test set
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state = 60)

# Created Linear Regression Model

In [None]:
lr = LinearRegression() # Created Linear Regression Model
lr.fit(X_train, y_train) # Trained your Model with sample data

# Plotting the regression line

In [None]:
# Plotting the regression line
line = lr.coef_*X+lr.intercept_

# Plotting for the test data
plt.scatter(X_train, y_train)
plt.plot(X, line);
plt.show()

In [None]:
# Predicted Values
y_pred = lr.predict(X_test)
y_pred

# MSE, MAE, R2_score

In [None]:
mean_squared_error(y_test,y_pred)

In [None]:
r2_score(y_test,y_pred)

In [None]:
mean_absolute_error(y_test, y_pred)

In [None]:
np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
# lets see the actual y_test and predicted values of the model(y_pred)
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})  
df 

#  Now predict for 9.25 hours

In [None]:
#  Now predict for 9.25 hours
new_data = np.array([9.25]).reshape(1,1)
new_data.shape

# Predicted Score

In [None]:
# Predicted Score
own_pred = lr.predict(new_data)
print("No of Hours = {}".format(new_data[0]))
print("Predicted Score = {}".format(own_pred))

In [None]:
X_line = X
y_line = lr.coef_ * X+ lr.intercept_

In [None]:
# Scatter Plot
plt.scatter(X_train, y_train)
plt.scatter(new_data.reshape(1), own_pred, c='black', s=100)
plt.plot(X_line, y_line, c='r')
plt.show()

In [None]:
plt.plot(sorted(y_test),c='r',linewidth = 10)
plt.plot(sorted(y_pred),c='w')
plt.show()

In [None]:
# Print ALL 
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_pred, y_test)
print('mean_squared_error:', mse)
print('mean_absolute_error:', mae)
print('r2_score:', r2)