In [None]:
import numpy as np # linear algebra
import scipy # training functions
import matplotlib.pyplot as plt # graphics
from scipy.optimize import curve_fit # MSE optimization

In [None]:
# variables
idade = [18,25,57,45,26,64,37,40,24,33] # independent variable
salario_anual = [15000,29000,68000,52000,32000,80000,41000,45000,26000,33000] # dependent variable

# turning lists into arrays
x_data = np.array(idade)
y_data = np.array(salario_anual)

In [None]:
# defines the function to be optimized (simple regression)
def linear_equation(x,a,b):
    return a * x + b

In [None]:
initial_parameters = np.array([1.0,1.0])

In [None]:
# performs optimization through the mean square error (MSE)
optimized_parameters, covariance = curve_fit(linear_equation,x_data,y_data,initial_parameters)

In [None]:
# performs data prediction through the model (linear equation)
prediction = linear_equation(x_data,*optimized_parameters)

In [None]:
# finds the absolute error (vertical lines)
absolute_error = prediction - y_data # predicted value - real value

In [None]:
squared_error = np.square(absolute_error)

mean_squared_error = np.mean(squared_error)

print('SE: ',squared_error)
print('MSE: ', mean_squared_error)

In [None]:
# coefficient of determination
r_squared = 1.0 - (np.var(absolute_error)/ np.var(y_data)); # numpy.var - returns the variance of the data
print("Coeficiente de determinação: ", r_squared);

In [None]:
# shows the regression parameters
print('Y = {}X {}'.format(optimized_parameters[0],optimized_parameters[1]))

In [None]:
# plots the figure
f = plt.figure(figsize=(4,4),dpi=100)
axes = f.add_subplot(111)

axes.plot(x_data,y_data,'ro')

x_model = np.linspace(min(x_data),max(x_data))
y_model = linear_equation(x_model,*optimized_parameters)

axes.plot(x_model,y_model)
plt.xlabel('Idade');
plt.ylabel('Salário Anual');

**Using Sklearn**

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [None]:
regression_object = LinearRegression()
x_sklmodel = x_data.reshape((-1,1))# in linear regression the x needs to be in 2D
regression = regression_object.fit(x_sklmodel,y_data)# performs the regression

In [None]:
prediction = regression_object.predict(x_sklmodel)

In [None]:
mean_squared_error = mean_squared_error(y_data,prediction)
print('MSE: ', mean_squared_error)

In [None]:
print('Y = {}X {}'.format(regression_object.coef_,regression_object.intercept_))

r_2 = r2_score(y_data,prediction)

print('Coeficiente de determinação (R2): ',r_2)

In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('../input/headbrain/headbrain.csv')
data.head()

In [None]:
x = data['Head Size(cm^3)'].values
y = data['Brain Weight(grams)'].values

In [None]:
reg = LinearRegression()
x_reshaped = x.reshape((-1,1))
regression = reg.fit(x_reshaped,y)

In [None]:
prediction = reg.predict(x_reshaped)

In [None]:
print('Y = {}x {}'.format(reg.coef_,reg.intercept_))

r_2 = r2_score(y,prediction)

print('Coeficiente de determinação (R2): ',r_2)

In [None]:
# plots the figure
f = plt.figure(figsize=(4,4),dpi=100)
plt.scatter(x,y,color='gray')
plt.plot(x,prediction,linewidth=2)


plt.xlabel('Head Size(cm^3)');
plt.ylabel('Brain Weight(grams)');