# Machine Learning Algorithms with Scikit-Learn

This notebook contains implementations of various machine learning algorithms using Scikit-Learn. Each algorithm is implemented in a separate cell with a brief description of the working steps.

#  Linear Regression

Linear Regression is used to model the relationship between a dependent variable and one or more independent variables. The goal is to find the line that best fits the data.

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#Generating the synthetic data
import numpy as np
X = np.random.rand(100, 1)*10
y = 2.5 * X + np.random.randn(100, 1) * 2  # y = mx + c

#Splitting data into training and testing sets
X_train , X_test, y_train , y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

#Training the Model
model = LinearRegression()
model.fit(X_train,y_train)

#Making the prediction
y_pred = model.predict(X_test)

#Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error : ",mse)

Mean Squared Error :  5.232387818698624


In [14]:
y_pred

array([[ 6.83341121],
       [21.54068063],
       [23.49103202],
       [ 2.39237584],
       [ 3.50738702],
       [ 2.91502636],
       [ 0.26938526],
       [22.58635509],
       [12.21858106],
       [ 4.53599483],
       [12.83122619],
       [11.3617735 ],
       [19.60310892],
       [15.44099321],
       [15.41108962],
       [11.79501606],
       [ 6.85894786],
       [13.2778536 ],
       [19.17029651],
       [ 2.75578589]])

In [15]:
y_test

array([[ 5.73875108],
       [21.92713055],
       [25.45541711],
       [ 2.85740087],
       [ 6.76128159],
       [ 7.4086608 ],
       [ 2.98600643],
       [24.15260711],
       [11.48660877],
       [ 5.87764808],
       [ 8.6241672 ],
       [14.957944  ],
       [16.86427588],
       [13.84672254],
       [16.1067107 ],
       [11.03605324],
       [ 8.86327646],
       [13.24688628],
       [17.01879185],
       [ 5.18346609]])

# Logistic Regression

Logistic Regression is used for binary classification problems. It models the probability of a class label based on one or more independent variables

In [16]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#Loading the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

#Using only two classes for binary classification
X = X[y != 2]
y = y[y != 2]

#Splitting data into training and testing sets
X_train , X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

#Training the model
model = LogisticRegression()
model.fit(X_train, y_train)

#Making the prediction
y_pred = model.predict(X_test)

#Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy : ',accuracy)

Accuracy :  1.0


# Decision Tree

Decision Tree is a non-parametric supervised learning method used for classification and regression. It splits the data into subse based on the most significant attributes.


In [17]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

#Loading the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

#Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Training the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

#Making the prediction
y_pred = model.predict(X_test)

#Evaluating the Model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy : ',accuracy)

Accuracy :  1.0


# Support Vector Machine (SVM)

Support Vector Machine (SVM) is a supervised learning model used for classification and regression. It finds the hyperplane that best separates the classes in the feature space.

In [18]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

#Loading the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

#Using only two classes for binary classification
X = X[y != 2]
y = y[y != 2]

#Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Training the model
model = SVC()
model.fit(X_train,y_train)

#Making the prediction
y_pred = model.predict(X_test)

#Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy : ',accuracy)


Accuracy :  1.0


In [19]:
y_pred

array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])

In [20]:
X_test

array([[6. , 2.7, 5.1, 1.6],
       [5.5, 2.3, 4. , 1.3],
       [5.9, 3.2, 4.8, 1.8],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.9, 0.4],
       [5.1, 3.4, 1.5, 0.2],
       [4.6, 3.6, 1. , 0.2],
       [5.5, 2.4, 3.8, 1.1],
       [5.4, 3.7, 1.5, 0.2],
       [5.1, 3.5, 1.4, 0.2],
       [5.7, 3.8, 1.7, 0.3],
       [4.8, 3.1, 1.6, 0.2],
       [6.1, 2.8, 4.7, 1.2],
       [5.5, 4.2, 1.4, 0.2],
       [5.5, 2.6, 4.4, 1.2],
       [5. , 3.6, 1.4, 0.2],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [4.8, 3. , 1.4, 0.1],
       [5.4, 3.4, 1.5, 0.4]])

In [21]:
y_test

array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])