# Simple Linear Regression Analysis

## Importing required libraries

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import os

## Importing data and cleaning it

In [None]:
df = pd.read_csv("../input/salary-data-simple-linear-regression/Salary_Data.csv")
df = df.dropna()
df.info()

## Making the data ready to be trained 

In [None]:
x=np.array(df["YearsExperience"]).reshape(-1, 1)
y=np.array(df["Salary"])
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.48)

## Training the model 

In [None]:
reg = LinearRegression()
reg.fit(x_train,y_train)

## Plotting the trained data 

In [None]:
y_pred = reg.predict(x_test)
plt.scatter(x_test, y_test, color='green')
plt.plot(x_test, y_pred, color='blue')
plt.legend(["Trained data","Orignal data"])
plt.title("Given Dataset after Training",size= 20)
plt.xlabel("Years of Experience",size = 15)
plt.ylabel("Salary",size = 15)
plt.show()
print("Score: ",reg.score(x_test,y_test))

## Making Predictions 

In [None]:
y_pred = np.round(y_pred)
pd.DataFrame({'Years Experience': x_test[:,0], 'Actual Salary': y_test, 'Predicted Salary': y_pred})

## Evaluation of the model 

In [None]:
from sklearn.metrics import r2_score
test_x = np.asanyarray(df["YearsExperience"]).reshape(-1,1)
test_y = np.asanyarray(df["Salary"])

y_hat = reg.predict(test_x)

print("Mean Absolute Error(MAE) : ",np.mean(np.absolute(y_hat - test_y)))
print("Mean Squared Error(MSE) : ",np.mean(np.absolute(y_hat - test_y)**2))
print("R2 Score(R2) : ",r2_score(y_hat,test_y))