# **Day 9**

**Logistic Regression**

It is one of the popular ML Algorithm used in the case of predicting various categorical datasets

**Problem Statement**

Classification of Iris Flower's using Logistic Regression

In [None]:
#lets import the libraries
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from math import ceil

#plots
import matplotlib.pyplot as plt
import seaborn as sb

#algorithms
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from pandas.plotting import parallel_coordinates

#Advanced Optimization
from scipy import optimize as op

In [None]:
#lets load the dataset
iris = pd.read_csv('../input/data-science-machine-learning-and-ai-using-python/Iris.csv')
iris.head()

In [None]:
#lets visualize the species
#plot the species with respect to sepal length
sepalPlt = sb.FacetGrid(iris, hue='Species',size=6).map(plt.scatter, "SepalLengthCm", "SepalWidthCm")
plt.legend(loc='upper left')

In [None]:
#plot the species with respect to petal length
petalPlt = sb.FacetGrid(iris, hue='Species',size=6).map(plt.scatter, "PetalLengthCm", "PetalWidthCm")
plt.legend(loc='upper left')

In [None]:
#let splot parallel coordinates of the petal and sepal
parallel_coordinates(iris.drop("Id", axis=1), "Species")

In [None]:
#lets setup the data for training our model
species = ['Iris-setosa','Iris-versicolor','Iris-virginica']

#number of examples
m = iris.shape[0]

#features
n = 4

#number of classes
k = 3

X = np.ones((m, n+1))
Y = np.array((m,1))

X[:,1] = iris['PetalLengthCm']
X[:,2] = iris['PetalWidthCm']
X[:,3] = iris['SepalLengthCm']
X[:,4] = iris['SepalWidthCm']

#lets provide labels
Y = iris['Species']

#mean normalization
for j in range(n):
  X[:,j] = (X[:,j] - X[:,j].mean())

#lets split dataset
X_train,X_test, Y_train,Y_test = train_test_split(X,Y, test_size=0.2, random_state=11)

In [None]:
#Logistic Regression
def sigmoid(z):
  return 1.0/(1 + np.exp(-z))

#Regularised cost functions
def regCostFunction(theta, X, Y, _lambda = 0.1):
  m = len(Y)
  h = sigmoid(X.dot(theta))
  reg = (_lambda/(2*m) * np.sum(theta **2))

  return ((1/m) * (-Y.T.dot(np.log(h)) -(1-Y).T.dot(np.log(1-h))) + reg)

def regGradient(theta, X, Y, _lambda = 0.1):
  m,n = X.shape
  theta = theta.reshape((n,1))
  Y = Y.reshape((m,1))
  h = sigmoid(X.dot(theta))
  reg = _lambda * theta / m

  return ((1/m) * X.T.dot(h-Y)) + reg

#Optimal Theta
def logisticRegression(X,Y,theta):
  res = op.minimize(fun = regCostFunction, x0 = theta, args = (X,Y), method='TNC', jac=regGradient)

  return res.x

In [None]:
#Lets train our model
all_theta = np.zeros((k, n+1))

#one vs all
i = 0
for flower in species:
  #set the labels 0 and 1
  tmp_y = np.array(Y_train == flower, dtype= int)
  optTheta = logisticRegression(X_train, tmp_y, np.zeros((n + 1, 1)))
  all_theta[i] = optTheta
  i += 1

In [None]:
#lets make predictions
P = sigmoid(X_test.dot(all_theta.T))  #probability for each flower
p = [species[np.argmax(P[i, :])] for i in range(X_test.shape[0])]


#lets print the accuracy
print("Test Accuracy : ", accuracy_score(Y_test, p) * 100, "%")

**Decision Tree**

A tree shaped algorithm to find the coarse of action. Each node in the tree represnt a action.

**Problem Statement**

Use Ml to Predict the selling price of houses baesd on some economic factors by using Decision Tree Model

In [None]:
#lets import the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#lets initialise the dataset
boston = pd.read_csv('../input/data-science-machine-learning-and-ai-using-python/Boston.csv')

#lets view the dataset
boston.head()

In [None]:
#lets visualize the dataset using scatter plot
x = boston['rm']
y = boston['medv']

#plot the scatter plot
plt.scatter(x,y, color='g')
plt.xlabel('Avg rooms per dwelling')
plt.ylabel('Median values of the home')

In [None]:
#now lets define the feature variable and target variable of our dataset
X = pd.DataFrame(x)  #feature variable
Y = pd.DataFrame(y)  #target variable

In [None]:
#lets divide the data into training set and test set
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train,Y_test = train_test_split(X,Y,test_size=0.20)

In [None]:
#Building the model with Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor

regressor = DecisionTreeRegressor(criterion='mse',random_state=100, max_depth=4, min_samples_leaf = 1)

#train the model
regressor.fit(X_train, Y_train)

In [None]:
#let visulaize the tree using graphviz
from sklearn.tree import export_graphviz

export_graphviz(regressor, out_file = 'regression_tree.dot')

In [None]:
#lets predict the values
y_pred = regressor.predict(X_test)

In [None]:
#lets print the values
print(y_pred[4:9])
print(Y_test[4:9])

In [None]:
#lets find out the rmse value
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_pred, Y_test)
rmse = np.sqrt(mse)
print(rmse)