<a href="https://colab.research.google.com/github/mandesai/SciforTechnologies/blob/main/MachineLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Linear Regression:

Linear Regression is a linear approach to modelling the relationship between a dependent variable and one or more independent variables.

In [1]:
import numpy as np

class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros((n_features,1))
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = np.dot(X, self.weights) + self.bias

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

In [2]:
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(0)
    X = 2 * np.random.rand(100, 1)
    y = 5 + 2 * X + np.random.randn(100, 1)

    # Instantiate and fit the model
    model = LinearRegression(learning_rate=0.01, n_iterations=1000)
    model.fit(X, y)

    # Make predictions
    X_test = np.array([[0], [2]])
    predictions = model.predict(X_test)
    print("Predictions:", predictions)

Predictions: [[4.83379102]
 [9.45993287]]


##Multiple Linear Regression:

Multiple Linear Regression (MLR) is an extension of simple linear regression that involves predicting a continuous target variable based on two or more predictor variables. In MLR, the relationship between the target variable and multiple predictors is modeled as a linear combination of the predictors.

In [4]:
import numpy as np

# Generate some sample data
np.random.seed(0)
X1 = 2 * np.random.rand(100, 1)
X2 = 3 * np.random.rand(100, 1)
y = 4 + 3 * X1 + 5 * X2 + np.random.randn(100, 1)

# Add bias term to feature matrix
X_b = np.c_[np.ones((100, 1)), X1, X2]

# Calculate the coefficients using the normal equation: theta = (X^T * X)^-1 * X^T * y
theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

print("Coefficients (intercept, theta1, theta2):", theta.flatten())

Coefficients (intercept, theta1, theta2): [4.30730531 2.72417723 4.91371774]


##Decision Tree:

Decision Tree is a supervised learning algorithm used for both classification and regression tasks. It works by recursively partitioning the feature space into smaller regions, making decisions based on the values of the features at each node. The goal is to create a tree that predicts the target variable by splitting the data into subsets that are as homogeneous as possible.

In [3]:
import numpy as np

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature  # Index of feature to split on
        self.threshold = threshold  # Threshold value for the feature
        self.left = left  # Left subtree
        self.right = right  # Right subtree
        self.value = value  # Class label for leaf node

class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.root = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))

        # Stopping criteria
        if (self.max_depth is not None and depth >= self.max_depth) or n_classes == 1:
            return Node(value=np.bincount(y).argmax())

        # Find the best split
        best_gini = float('inf')
        best_feature, best_threshold = None, None
        for feature in range(n_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = X[:, feature] < threshold
                gini = self._gini_impurity(y[left_indices], y[~left_indices])
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold

        # Create split
        left_indices = X[:, best_feature] < best_threshold
        left = self._grow_tree(X[left_indices], y[left_indices], depth + 1)
        right = self._grow_tree(X[~left_indices], y[~left_indices], depth + 1)

        return Node(feature=best_feature, threshold=best_threshold, left=left, right=right)

    def _gini_impurity(self, left_y, right_y):
        n_left, n_right = len(left_y), len(right_y)
        n_total = n_left + n_right
        p_left = np.bincount(left_y, minlength=n_total) / n_left
        p_right = np.bincount(right_y, minlength=n_total) / n_right
        gini_left = 1.0 - np.sum(p_left ** 2)
        gini_right = 1.0 - np.sum(p_right ** 2)
        return (n_left * gini_left + n_right * gini_right) / n_total

    def predict(self, X):
        return np.array([self._predict_single(x, self.root) for x in X])

    def _predict_single(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature] < node.threshold:
            return self._predict_single(x, node.left)
        else:
            return self._predict_single(x, node.right)

In [5]:
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(0)
    X = np.random.rand(100, 2)
    y = (X[:, 0] + X[:, 1] > 1).astype(int)

    # Instantiate and fit the model
    model = DecisionTreeClassifier(max_depth=3)
    model.fit(X, y)

    # Make predictions
    X_test = np.array([[0.5, 0.5], [1.5, 1.5]])
    predictions = model.predict(X_test)
    print("Predictions:", predictions)

Predictions: [1 1]


  p_left = np.bincount(left_y, minlength=n_total) / n_left
  p_left = np.bincount(left_y, minlength=n_total) / n_left


##Naive Bayes:

Naive Bayes is a probabilistic classifier based on Bayes' theorem with the "naive" assumption of independence between features. It's commonly used for classification tasks, especially in text classification and spam filtering.

In [6]:
import numpy as np

class GaussianNaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        self.mean = np.zeros((len(self.classes), n_features))
        self.var = np.zeros((len(self.classes), n_features))
        self.priors = np.zeros(len(self.classes))

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / float(n_samples)

    def _calculate_likelihood(self, X, class_idx):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(- (X - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _calculate_posterior(self, X):
        posteriors = []
        for idx, _ in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            likelihood = np.sum(np.log(self._calculate_likelihood(X, idx)), axis=1)
            posterior = prior + likelihood
            posteriors.append(posterior)
        return posteriors

    def predict(self, X):
        posteriors = self._calculate_posterior(X)
        return self.classes[np.argmax(posteriors, axis=0)]

In [7]:
if __name__ == "__main__":
    # Generate sample data
    np.random.seed(0)
    X = np.random.randn(100, 2)
    y = np.random.randint(0, 2, 100)

    # Instantiate and fit the model
    model = GaussianNaiveBayes()
    model.fit(X, y)

    # Make predictions
    X_test = np.array([[0, 0], [1, 1]])
    predictions = model.predict(X_test)
    print("Predictions:", predictions)

Predictions: [0 1]


##SVM:

Support Vector Machine (SVM) is a powerful supervised learning algorithm used for classification and regression tasks. It works by finding the hyperplane that best separates the classes in the feature space. SVM aims to maximize the margin between the hyperplane and the nearest data points (support vectors) of each class while minimizing the classification error.

In [8]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate the SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0, random_state=42)

# Fit the model on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = svm_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0
