In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Gerando dados para regressão linear simples

In [None]:
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt

x, y, coef = datasets.make_regression(n_samples=100,#number of samples
                                      n_features=1,#number of features
                                      n_informative=1,#number of useful features 
                                      noise=10,#bias and standard deviation of the guassian noise
                                      coef=True,#true coefficient used to generated the data
                                      random_state=0) #set for same data points for each run

# Scale feature x (years of experience) to range 0..20
x = np.interp(x, (x.min(), x.max()), (-20, 20))
# Scale target y (salary) to range 20000..150000 
y = np.interp(y, (y.min(), y.max()), (0, 50))

plt.plot(x,y,'.',label='training data')
plt.show()

# Modelando uma regressão linear

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(x, y)

# Make predictions using the testing set
y_pred = regr.predict(x)

print('Coefficients: {}, {}\n'.format(regr.coef_, regr.intercept_)) # The coefficients
print('Mean squared error: %.2f' % mean_squared_error(y, y_pred)) # The mean squared error
print('Coefficient of determination: %.2f' % r2_score(y, y_pred)) # The coefficient of determination: 1 is perfect prediction

# Plot outputs
plt.plot(x, y, '.')
plt.plot(x, y_pred, color='black', linewidth=1)
plt.show()

# Modelando uma regressão polinomial

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline


def f(x):
    """ function to approximate by polynomial interpolation"""
    return x * np.sin(x)


# generate points used to plot
x_plot = np.linspace(0, 10, 100)

# generate points and keep a subset of them
x = np.linspace(0, 10, 100)
rng = np.random.RandomState(0)
rng.shuffle(x)
x = np.sort(x[:20])
y = f(x)

# create matrix versions of these arrays
X = x[:, np.newaxis]
X_plot = x_plot[:, np.newaxis]

plt.plot(x_plot, f(x_plot), color='cornflowerblue', linewidth=2, label="ground truth")
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

degree = 4
model = make_pipeline(PolynomialFeatures(degree), linear_model.Ridge())
# model = make_pipeline(PolynomialFeatures(degree), linear_model.LinearRegression())
model.fit(X, y)
y_plot = model.predict(X_plot)
plt.plot(x_plot, y_plot, color='red', linewidth=2, label="degree %d" % degree)

plt.legend(loc='lower left')

plt.show()

In [None]:
print(model.get_params())
model.named_steps['polynomialfeatures'].get_feature_names()

In [None]:
# print(model.named_steps['linearregression'].coef_)
# print(model.named_steps['linearregression'].intercept_)
print(model.named_steps['ridge'].coef_)
print(model.named_steps['ridge'].intercept_)



# Modelando uma Decision Tree

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

# Create a random dataset
rng = np.random.RandomState(1)
x = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(x).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
max_depth = 100
regr_1 = DecisionTreeRegressor(max_depth=max_depth)
regr_1.fit(x, y)

# Predict
x_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_1 = regr_1.predict(x_test)

# Plot the results
plt.figure()
plt.scatter(x, y, s=20, edgecolor="black", c="darkorange", label="data")
plt.plot(x_test, y_1, color="cornflowerblue", label="max_depth={}".format(max_depth), linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()

In [None]:
from sklearn import tree
_ = tree.plot_tree(regr_1, filled=True)

# Modelando uma Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(x, y)
y_pred = regressor.predict(x)

# Plot the results
plt.figure()
plt.scatter(x, y, s=20, edgecolor="black", c="darkorange", label="data")
plt.plot(x, y_pred, color="cornflowerblue", label="RF prediction", linewidth=2)
# plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Random Forest Regression")
plt.legend()
plt.show()

# Modelando uma Logistic Regression

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white")

# Cria dados
X, y = make_classification(200, 2, n_informative=2, n_redundant=0, weights=[.5, .5], random_state=15)
# Cria regressão logística
clf = LogisticRegression().fit(X[:100], y[:100])

# Divide os dados em faixas
xx, yy = np.mgrid[-5:5:.01, -5:5:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
# Calcula a probabilidade entre cada faixa
probs = clf.predict_proba(grid)[:, 1].reshape(xx.shape)

f, axes = plt.subplots(1, 2, figsize=(16, 6), squeeze=False)
contour = axes[0][0].contourf(xx, yy, probs, 25, cmap="RdBu", vmin=0, vmax=1)
ax_c = f.colorbar(contour, ax=axes[0][0])
ax_c.set_label("$P(y = 1)$")
ax_c.set_ticks([0, .25, .5, .75, 1])

axes[0][0].scatter(X[100:,0], X[100:, 1], c=y[100:], s=50, cmap="RdBu", vmin=-.2, vmax=1.2, edgecolor="white", linewidth=1)
axes[0][0].set(aspect="equal", xlim=(-5, 5), ylim=(-5, 5), xlabel="$X_1$", ylabel="$X_2$")

axes[0][1].contour(xx, yy, probs, levels=[.20], cmap="Greys", vmin=-10, vmax=0)
axes[0][1].scatter(X[100:,0], X[100:, 1], c=y[100:], s=50, cmap="RdBu", vmin=-.2, vmax=1.2, edgecolor="white", linewidth=1)
axes[0][1].set(aspect="equal", xlim=(-5, 5), ylim=(-5, 5), xlabel="$X_1$", ylabel="$X_2$")

plt.show()