# SIMPLE LINEAR REGRESSION

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')

# Loading Data

In [None]:
data = pd.read_csv('../input/salary-data-simple-linear-regression/Salary_Data.csv')
data

In [None]:
data.describe()

In [None]:
data.corr()

# Plotting Data

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(18, 5))
ax[0].set_title('Years of Experience', fontsize=15)
ax[0].hist(data.YearsExperience, color='brown')
ax[1].set_title('Salary', fontsize=15)
ax[1].hist(data.Salary, color='brown')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title('Years of Experience vs Salary', fontsize=13)
plt.xlabel('Experience (yrs)', fontsize=12)
plt.ylabel('Salary', fontsize=12)
plt.scatter(data.YearsExperience, data.Salary, s=75, color='black')
plt.show()

# Simple Regression Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import r2_score 

## Creating Training & Testing Set

In [None]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [None]:
col = ['X_train', 'X_test', 'y_train', 'y_test']
data = [X_train, X_test, y_train, y_test]
for i in range(len(col)):
    print(f'Shape of {col[i]}: {data[i].shape}')

## Plotting Dataset Distribution

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
ax[0].set_title('Training Set', fontsize=15)
ax[0].scatter(X_train, y_train, s=75, color='black')
ax[0].set_xlabel('Experience (yrs)')
ax[0].set_ylabel('Salary')
ax[1].set_title('Testing Set', fontsize=15)
ax[1].scatter(X_test, y_test, s=75, color='black')
ax[1].set_xlabel('Experience (yrs)')
ax[1].set_ylabel('Salary')
plt.show()

## Modeling & Fitting

In [None]:
regr = LinearRegression()
regr.fit(X_train, y_train)
print(f'Coefficients: {regr.coef_}')
print(f'Intercepts: {regr.intercept_}')

## Plotting Regression Line

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
ax[0].set_title('Training Set', fontsize=15)
ax[0].scatter(X_train, y_train, s=75, color='black')
ax[0].plot(X_train, regr.coef_*X_train + regr.intercept_, '-r')
ax[0].set_xlabel('Experience (yrs)')
ax[0].set_ylabel('Salary')
ax[1].set_title('Testing Set', fontsize=15)
ax[1].scatter(X_test, y_test, s=75, color='black')
ax[1].plot(X_test, regr.coef_*X_test + regr.intercept_, '-r')
ax[1].set_xlabel('Experience (yrs)')
ax[1].set_ylabel('Salary')
plt.show()

## Making Predictions

In [None]:
y_pred = np.round(regr.predict(X_test))
pd.DataFrame({'Years Experience': X_test[:,0], 'Actual Salary': y_test, 'Predicted Salary': y_pred})

## Evaluation

In [None]:
print(f'Mean Absolute Error(MAE): {mean_absolute_error(y_test, y_pred)}')
print(f'Residual Sum of Squares(MSE): {mean_squared_error(y_test, y_pred)}')
print(f'R2-Score: {r2_score(y_test, y_pred)}')

## Thank You!