# Simple Regression

## Import Libraries

In [None]:
import numpy as np # work with multi-dimensional arrays
import pandas as pd # enhances numpy to organise data in tabular form
import scipy # Ecosystem conatining tools like numpy,pandas,matplotlib usable for MATHS/ML
import statsmodels.api as sm #package built on numpy and scipy which integrates with pandas
import matplotlib.pyplot as plt # 2D visualizations of NumPy computations
import seaborn as sns # high level interface for drawing attractive statistical graphics based on matplotlib
sns.set() # overides style and graphics of all matplotlib graphs
# import sklearn # widely used machine learning
from sklearn.linear_model import LinearRegression

## Load Data

In [None]:
file_path = '####' # provide path the file/data, NB: all '\' must be changed to '/'

data = pd.read_csv(file_path) # import a .csv file

In [None]:
# check data
data

## Create regression

#### Define dependent and independent variables

In [None]:
yname = '####' # column name of dependent variable
x1name = '####' # column name of independent variable

y = data[yname] # dependent variable
x1 = data[x1name] # independent variable

#### Explore in plot

In [None]:
plt.scatter(x1,y) # plot the values of x against y
plt.xlabel(x1name, fontsize = 20) # label x-axis
plt.ylabel(yname, fontsize = 20) # label y-axis
plt.show()

### Perform regression
#### Using Statsmodels

In [None]:
x = sm.add_constant(x1)
result = sm.OLS(y,x).fit() # perform the regression
result.summary() # display result of regression in different tables

#### Using SK_Learn

In [None]:
x1.shape,y.shape # check shapes of variables

In [None]:
# SK_Learn expects multiple independednt variables, NB: only done for simple linear regression
x_matrix = x1.values.reshape(-1,1)
x_matrix.shape

In [None]:
reg = LinearRegression() # create model object
reg.fit(x_matrix,y) # fit the model

In [None]:
reg.score(x_matrix,y) # get R-Squared
reg.coef_ # get regression coefficients
reg.intercept_ # get intercept

In [None]:
# Predict
input_val = '####' # 'x'/input variable to be used for prediction
reg.predict([[input_val]]) # used when a single value is provided

to_pred_data = '####' # used when a dataframe is provided
reg.predict(to_pred_data)

#### Explore further on plot

In [None]:
m = "####" # intercept, NB: is a number
err = '####' # error/residual/epsalon, NB: is a number

plt.scatter(x1,y) # plot the values of x against y
yhat = m*x1 + err # regression equation of line
fig = plt.plot(x1,yhat,
               lw=4, # line width
               c = 'orange', # line color
               label= 'regression line' # line label)
plt.xlabel(x1name, fontsize = 20) # label x-axis
plt.ylabel(yname, fontsize = 20) # label y-axis
plt.show()