### Importing all the required libraries

In [None]:
import pandas as pd
import numpy as np  
from scipy.stats import shapiro
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn import metrics 

### Loading the data from the provided external URL

In [None]:
link = "http://bit.ly/w-data"
data = pd.read_csv(link)
data.head(5)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

### Visualization the dataset

In [None]:
plt.title("Hours vs Percentage")
plt.xlabel("Hours")
plt.ylabel("Percentage")
plt.scatter(data['Hours'],data['Scores'])
plt.grid()
plt.show()

### Outlier detection

In [None]:
lower_limit=data.mean()-3*data.std()
upper_limit=data.mean()+3*data.std()

In [None]:
lower_limit[0]

In [None]:
upper_limit[0]

In [None]:
filtered_data=data[(data.iloc[:,0]>lower_limit[0]) & (data.iloc[:,0]<upper_limit[0])]

In [None]:
data.count()

In [None]:
filtered_data.count()

### Visualizing the distribution of data

In [None]:
sns.kdeplot(data.iloc[:,0])

In [None]:
sns.kdeplot(data.iloc[:,1])

### Shapiro Wilk test for testing the distribution of data is normal or not

In [None]:
shapiro(data.iloc[:,0])


### Correlation Analysis

In [None]:
sns.heatmap(data.corr(),annot=True)

### Training and Testing data

In [None]:
independent_feature = data.iloc[:, :-1].values  
dependent_feature = data.iloc[:, 1].values  

In [None]:
 x_train, x_test, y_train, y_test = train_test_split(independent_feature, dependent_feature, 
                            test_size=0.2, random_state=42) 

### Model building

In [None]:
linear_model = LinearRegression()  
linear_model.fit(x_train, y_train) 

In [None]:
line = linear_model.coef_*independent_feature+linear_model.intercept_
plt.scatter(independent_feature, dependent_feature)
plt.plot(independent_feature, line);
plt.show()

In [None]:
y_pred = linear_model.predict(x_test)

### Calculating the error metrics

In [None]:
print('Mean Absolute Error:', 
      metrics.mean_absolute_error(y_test, y_pred)) 
print('Mean Squared Error:',
      metrics.mean_squared_error(y_test,y_pred)) 

### Providing custom input into the model

In [None]:
hours_of_study=float(input("Enter the number of hours of study:"))

In [None]:
percentage_prediction = linear_model.predict([[hours_of_study]])
print("No of Hours = {}".format(hours_of_study))
print("Predicted Score = {}".format(percentage_prediction[0]))