## import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Supervised Learning: lineal Regression

## import DataSet or DataSources

In [None]:
df_salary = pd.read_csv('./local_files/salary.csv')

## Create varibles (Dependent and Independent)

### using iloc and loc from pandas

In [None]:
# using function loc from pandas or function iloc
# loc: gets items column from name ==> SINTAXIS: df.loc[<interval(index:index)>, ['nameColumn]]
# iloc: get items column from index ==> SINTAXIS: df.loc[<interval(index:index)>, index:index]

#example
data_using_loc = df_salary.loc[:, ['YearsExperience']]
data_using_iloc = df_salary.head().iloc[:, 1]
data_using_loc

## X, Y

In [None]:
#create variable dependent(y) and independet(x)

x = df_salary.loc[:, ['YearsExperience']].values
y = df_salary.iloc[:, -1].values
y

## dataframe split into training and test set using Sklearn

### train_test_split
allows testing and training

In [None]:
#import
from sklearn.model_selection import train_test_split

#we create a set of tests and trainings: NOTE: order variables is valid
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
y_train

## Training the linear regression model with your data set

### import LinearRegression

In [None]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(x_train, y_train)

### Prediction of test results

In [None]:
y_pred = regressor.predict(x_test)


In [None]:
y_test

## Training set display
### using matplotlib.pyplot

### show data from training

In [None]:
#plot the points from training x, y
plt.scatter(x_train, y_train, color = 'red')

#show lineal chart of the x= training and y=predict from x_train
plt.plot(x_train, regressor.predict(x_train), color = 'blue')

#add title chart
plt.title('Salary vs Exprence(Training set)')

#add varibles x and y
plt.xlabel('Years Experience')
plt.ylabel('Salary')
plt.show()

### show data from test

In [None]:
#plot the points from training x, y
plt.scatter(x_test, y_test, color = 'red')

#show lineal chart of the x= training and y=predict from x_train
plt.plot(x_train, regressor.predict(x_train), color = 'blue')

#add title chart
plt.title('Salary vs Exprence(test set)')

#add varibles x and y
plt.xlabel('Years Experience')
plt.ylabel('Salary')
plt.show()

In [None]:
### find items base, medium and more salary

#create array with values: 0, 5, 15 ==> year
arr_year = np.array([[0], [5], [15]])

#get predict from array year in the my regressor
predict_salary_from_year = regressor.predict(arr_year)

#flatten convert array 2D to 1D
plt.bar(arr_year.flatten().tolist(), predict_salary_from_year.tolist())
#add title chart
plt.title('Salary vs Exprence(test set)')

#add varibles x and y
plt.xlabel('Years Experience')
plt.ylabel('Salary')





# Supervised Learning: Logistic Regression

## import DataSet or DataSources

In [None]:
df_social_network = pd.read_csv('./local_files/social_network_ads.csv')

## Create varibles (Dependent and Independent)

In [None]:
x = df_social_network.iloc[:, :-1]
y = df_social_network.iloc[:, -1]

## dataframe split into training and test set using Sklearn

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0) 

## apply normalization
 using standar scaler from Sklearn

 ### fit_transform
> Standardize features by removing the mean and scaling to unit variance. The standard score of a sample x is calculated as:
> z = (x - u) / s
>where u is the mean of the training samples or zero if with_mean = False, and s is the standard deviation of the training samples or one if with_std = False.

 

In [None]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()

#normalize X variable training and test
x_train = std_scaler.fit_transform(x_train)
x_test = std_scaler.transform(x_test)

## Training model

In [None]:
from sklearn.linear_model import LogisticRegression

#random_state keeps the same results
logistic = LogisticRegression(random_state=0)
logistic.fit(x_train, y_train)

### predict new result

In [None]:
#predict person with 30 year and salary of 87,000
predict_person = logistic.predict(std_scaler.transform([[30,87000]]))
predict_person

### predic with test data

In [None]:
predict_test_data = logistic.predict(x_test)

### check accuracy
using sklearn.metric and import accuracy_score

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, predict_test_data)

## visualize result of predict

In [None]:
#import color matplotlib
from matplotlib.colors import ListedColormap

x_set, y_set = std_scaler.inverse_transform(x_train), y_train
x1, x2 = np.meshgrid(np.arange(start = x_set[:, 0].min() - 10, stop = x_set[:, 0].max() + 10, step = 0.25),
            np.arange(start = x_set[:, 1].min() - 1000, stop = x_set[:, 1].max() + 1000, step = 0.25))

plt.contourf(x1, x2, logistic.predict(std_scaler.transform(np.array([x1.ravel(), x2.ravel()]).T)).reshape(x1.shape),
    alpha = 0.75, cmap = ListedColormap(('red', 'green')))

plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())

for i, j in enumerate(np.unique(y_set)):
   plt.scatter(x_set[y_set == j, 0],x_set[y_set == j, 1], c =ListedColormap(('red', 'green'))(i), label = j)

plt.title('Logistic Regression (training set)')
plt.xlabel('year')
plt.ylabel('salary')
plt.legend()
plt.show()


### visualize result of test

In [None]:
#import color matplotlib
from matplotlib.colors import ListedColormap

x_set, y_set = std_scaler.inverse_transform(x_test), y_test
x1, x2 = np.meshgrid(np.arange(start = x_set[:, 0].min() - 10, stop = x_set[:, 0].max() + 10, step = 0.25),
            np.arange(start = x_set[:, 1].min() - 1000, stop = x_set[:, 1].max() + 1000, step = 0.25))

plt.contourf(x1, x2, logistic.predict(std_scaler.transform(np.array([x1.ravel(), x2.ravel()]).T)).reshape(x1.shape),
    alpha = 0.75, cmap = ListedColormap(('red', 'green')))

plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())

for i, j in enumerate(np.unique(y_set)):
   plt.scatter(x_set[y_set == j, 0],x_set[y_set == j, 1], c =ListedColormap(('red', 'green'))(i), label = j)

plt.title('Logistic Regression (training set)')
plt.xlabel('year')
plt.ylabel('salary')
plt.legend()
plt.show()