In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [31]:
df = pd.read_csv('Social_Network_Ads.xls')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [32]:
X_pd = df.iloc[:, 1:-1]
X_pd.head()

Unnamed: 0,Gender,Age,EstimatedSalary
0,Male,19,19000
1,Male,35,20000
2,Female,26,43000
3,Female,27,57000
4,Male,19,76000


In [33]:
X_pd['Gender'] = X_pd['Gender'].map({'Male': 0, 'Female': 1})
X_pd.head()

Unnamed: 0,Gender,Age,EstimatedSalary
0,0,19,19000
1,0,35,20000
2,1,26,43000
3,1,27,57000
4,0,19,76000


In [34]:
X_pd['EstimatedSalary'] = (X_pd['EstimatedSalary'] - X_pd['EstimatedSalary'].min()) / (X_pd['EstimatedSalary'].max() - X_pd['EstimatedSalary'].min())

In [35]:
X_pd.insert(0, 'X0', 1)
X_pd.head()

Unnamed: 0,X0,Gender,Age,EstimatedSalary
0,1,0,19,0.02963
1,1,0,35,0.037037
2,1,1,26,0.207407
3,1,1,27,0.311111
4,1,0,19,0.451852


In [36]:
y_pd = df.iloc[:, -1]
y_pd.head()

0    0
1    0
2    0
3    0
4    0
Name: Purchased, dtype: int64

In [37]:
X = X_pd.to_numpy(dtype=np.longdouble)
y = y_pd.to_numpy(dtype=np.longdouble)

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1192)

In [39]:
class LogisticRegression:

    def __init__(self):
        self.learning_rate = None
        self.convergence_criteria = None
        self.n_epochs_used = None
        self.parameters = None

    def hypothesis(self, param_, input_):
        return (1 / (1 + np.exp(-1 * np.dot(np.transpose(param_), input_))))

    def stochastic_graident_descent(self, input_, label_, learning_rate_, total_epochs_, convergence_criteria_):

        self.learning_rate = learning_rate_
        self.convergence_criteria = convergence_criteria_
        self.n_epochs_used = 0
        self.parameters = np.random.rand(input_.shape[1], 1).astype(np.float128)

        for _ in range(total_epochs_):

            random_index = np.random.permutation(len(input_))

            shuffled_input_ = input_[random_index]
            shuffled_label_ = label_[random_index]

            prev_param_ = self.parameters.copy()
            
            for current_row in range(len(input_)):
                
                current_input = shuffled_input_[current_row].reshape(-1, 1)  # Reshape to (n_features, 1)
                current_label = shuffled_label_[current_row]

                # Update parameters using the gradient of the loss
                gradient = (current_input * (current_label - self.hypothesis(self.parameters, current_input))).T
                self.parameters += self.learning_rate * gradient.T

            # Check convergence based on the difference in parameters
            if np.linalg.norm(self.parameters - prev_param_) < self.convergence_criteria:
                break

            self.n_epochs_used += 1
    
    def predict(self, X_test):
        predictions = np.zeros(X_test.shape[0], dtype=int)
    
        for i in range(X_test.shape[0]):
            current_input = X_test[i].reshape(-1, 1)
            probability = self.hypothesis(self.parameters, current_input)
            predictions[i] = 1 if probability > 0.5 else 0
    
        return predictions

In [40]:
lgr = LogisticRegression()
lgr.stochastic_graident_descent(X_train, y_train, 0.001, 1000, 0.001)

In [41]:
y_pred = lgr.predict(X_test)

In [45]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Calculate accuracy for your model
accuracy_custom = accuracy_score(y_test, y_pred)

# Print the results
print("Accuracy of your model:", accuracy_custom)

# Optionally print a classification report for more detailed metrics
print("Classification report for your model:\n", classification_report(y_test, y_pred))

Accuracy of your model: 0.875
Classification report for your model:
               precision    recall  f1-score   support

         0.0       0.85      0.96      0.90        46
         1.0       0.93      0.76      0.84        34

    accuracy                           0.88        80
   macro avg       0.89      0.86      0.87        80
weighted avg       0.88      0.88      0.87        80



In [50]:
# Assuming your model has a method to get coefficients
coeffs_custom = lgr.parameters.flatten()

print("Coefficients from your model:", coeffs_custom)


Coefficients from your model: [-8.14816038 -0.17095371  0.16335937  3.08390723]
