# In this practice session, we will learn to code Linear Regression. 
# We will perform the following steps to build a simple regression model using the popular Beer dataset.

 
 
  - **Data Preprocessing**

    - Importing the libraries.
    - Importing dataset.
    - Dealing with the categorical variable.
    - Classifying dependent and independent variables.
    - Splitting the data into a training set and test set.
    - Feature scaling.
 

  -  **Linear Regression**

    - Create a Linear Regression.
    - Feed the training data to the regressor model.
    - Predicting the scores for the test set.
    - Using the RMSE to measure the performance.

In [None]:
!python -m pip install pip --upgrade --user -q
!python -m pip install numpy pandas seaborn matplotlib scipy statsmodels sklearn --user -q

In [None]:
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

# Load the Dependencies

In [None]:
import ipywidgets as widgets
from IPython.display import display

style = {'description_width': 'initial'}

In [None]:
#1 Importing essential libraries
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

# Load the Dataset

In [None]:
#2 Importing the dataset

file_name = 'Salary_Data.csv'
dataset = pd.read_csv(file_name)

In [None]:
#Displaying the dataset
dataset.head(8)

In [None]:
print(f"Dataset has {dataset.shape[0]} rows and {dataset.shape[1]} columns.")

In [None]:
#Plotting the relation between salary and experience
wig_col = widgets.Dropdown(
                options=dataset.columns.tolist(),
                description='Choose a Column to Plot vs. Salary',
                disabled=False,
                layout=widgets.Layout(width='40%', height='40px'),
                style=style)

# Plot Variables

In [None]:
display(wig_col)
plt.plot(dataset[str(wig_col.value)], dataset['Salary']);

In [None]:
#3 classify dependent and independent variables
X = dataset.iloc[:,:-1].values  #independent variable YearsofExperience
y = dataset.iloc[:,-1].values  #dependent variable salary

In [None]:
print("\nIdependent Variable (Experience):\n\n", X[:5])
print("\nDependent Variable (Salary):\n\n", y[:5])

# Create Train and Test Sets

In [None]:
#4 Creating training set and testing set
from sklearn.model_selection import train_test_split
test_size = widgets.FloatSlider(min=0.01, max=0.6, value=0.2, description="Test Size :", tooltips=['Usually 20-30%'])
display(test_size)

In [None]:
#Divide the dataset into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X ,y, test_size=test_size.value, random_state = 0) 

In [None]:
print("Training Set :\n----------------\n")
print("X = \n", X_train[:5])
print("y = \n", y_train[:5])

print("\n\nTest Set :\n----------------\n")
print("X = \n",X_test[:5])
print("y = \n", y_test[:5])

In [None]:
print(f"Shape of Training set is {X_train.shape}")
print(f"Shape of Testing set is {X_test.shape}")

# Apply Linear Regression 

In [None]:
"""# II. Simple Linear Regressor """

#5 import SLR library
from sklearn.linear_model import LinearRegression

# configure params for the model.
fit_int_wig = widgets.ToggleButtons(
                options=['TRUE', 'FALSE'],
                description='Fit Intercept :',
                disabled=False,
                style=style,
                tooltips=['whether to calculate the intercept for this model. If set to False,\
                          no intercept will be used in calculations.'])

display(fit_int_wig)

norm_wig = widgets.ToggleButtons(
                value='FALSE',
                options=['TRUE', 'FALSE'],
                description='Normalize :',
                disabled=False,
                tooltips=['This parameter is ignored when fit_intercept is set to False. \
                            If True, the regressors X will be normalized before regression by \
                            subtracting the mean and dividing by the l2-norm.'],
                style=style)

display(norm_wig)

njobs_wig = widgets.Dropdown(options=[('One', 1), ('Two', 2), ('Three', 3), ('All Cores', -1)], 
                             description="Number of CPU Cores = ", style=style)

display(njobs_wig)

# Predict and Evaluate the Model 

In [None]:
#6 Train the Regressor with training set
regressor = LinearRegression(fit_intercept=fit_int_wig.value, normalize=False,
                            n_jobs=njobs_wig.value)

#fit the linear model
regressor.fit(X_train, y_train)

#7 predict the outcome of test sets
y_Pred = regressor.predict(X_test)
print("\nPredictions = ", y_Pred)

In [None]:
#8 Claculating the Accuracy of the predictions
from sklearn import metrics
print("Prediction Accuracy = ", metrics.r2_score(y_test, y_Pred))

#9 Comparing Actual and Predicted Salaries for he test set
print("\nActual vs Predicted Salaries \n------------------------------\n")
error_df = pd.DataFrame({"Actual" : y_test,
                         "Predicted" : y_Pred,
                         "Abs. Error" : np.abs(y_test - y_Pred)})

error_df

# Actual vs. Predicted 

In [None]:
#Plotting Actual observation vs Predictions
plt.scatter(X_test,y_test, s = 70, label='Actual')
plt.scatter(X_test,y_Pred, s = 90, marker = '^', label='Predicted')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.legend();
plt.grid();
plt.show();


# Excercise 

In [None]:
# Evaluate different Regression metrics

from sklearn.metrics import mean_squared_error

In [None]:
## implement the rmse metric to calculate the RMSE(Root Mean Squared Error)


"""Hint : Replace y_true and y_pred with the right variables in the notebook.
          Also, don't forget to add the squared_root part to reach RMSE score."""

print(f"Root Mean Squared Error : {mean_squared_error(y_true=None, y_pred=None)}")

