In [14]:
import pandas as pd
import numpy as np

# Step 1: Load the data
file_path = 'Customer_buy.csv'
data = pd.read_csv(file_path)

# Step 2: Explore the data
print(data.head())
print(data.info())

# Step 3: Preprocess the data
X = data['No_emails_sent'].values.reshape(-1, 1)  # Feature
y = data['Class'].values                          # Target

# Step 4: Initialize parameters
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

w = np.zeros(X.shape[1])
b = 0
learning_rate = 0.01
epochs = 1000  # Increase epochs for better convergence

# Step 5: Train the model
for epoch in range(epochs):
    z = np.dot(X, w) + b
    predictions = sigmoid(z)

    # Gradient Descent
    dw = np.dot(X.T, (predictions - y)) / y.size
    db = np.sum(predictions - y) / y.size

    w -= learning_rate * dw
    b -= learning_rate * db

    # Print parameters every 100 epochs
    if (epoch + 1) % 100 == 0:
        loss = -np.mean(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
        print(f'Epoch {epoch+1}: w = {w}, b = {b}, Loss = {loss}')

# Step 6: Make predictions
def predict(x):
    z = np.dot(x, w) + b
    return (sigmoid(z) > 0.5).astype(int)

emails_sent = np.array([[5]])  # Predict for 5 promotional emails
prediction = predict(emails_sent)
print(f'Prediction for {emails_sent[0][0]} promotional emails: {prediction[0]}')


   No_emails_sent  Discount_offered  Class
0               2                10      0
1               3                25      1
2               5                10      0
3               5                30      1
4               4                15      0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   No_emails_sent    6 non-null      int64
 1   Discount_offered  6 non-null      int64
 2   Class             6 non-null      int64
dtypes: int64(3)
memory usage: 272.0 bytes
None
Epoch 100: w = [0.05876587], b = -0.04118658263363282, Loss = 0.684430958482125
Epoch 200: w = [0.07009793], b = -0.09155052088430553, Loss = 0.6817648518954607
Epoch 300: w = [0.08089586], b = -0.1408638445751143, Loss = 0.6792154924722261
Epoch 400: w = [0.09145623], b = -0.1890874136187146, Loss = 0.6767775402832745
Epoch 500: w = [0.10178587], b = -0.236245214712

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 1: Load the data
file_path = 'Customer_buy.csv'
data = pd.read_csv(file_path)

# Step 2: Explore the data
print(data.head())
print(data.info())

# Step 3: Preprocess the data
X = data['No_emails_sent'].values.reshape(-1, 1)  # Feature
y = data['Class'].values                          # Target

# Step 4: Initialize parameters (Custom Implementation)
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

w = np.zeros(X.shape[1])
b = 0
learning_rate = 0.01
epochs = 1000  # Increase epochs for better convergence

# Step 5: Train the model (Custom Implementation)
for epoch in range(epochs):
    z = np.dot(X, w) + b
    predictions = sigmoid(z)

    # Gradient Descent
    dw = np.dot(X.T, (predictions - y)) / y.size
    db = np.sum(predictions - y) / y.size

    w -= learning_rate * dw
    b -= learning_rate * db

    # Print parameters every 100 epochs
    if (epoch + 1) % 100 == 0:
        loss = -np.mean(y * np.log(predictions) + (1 - y) * np.log(1 - predictions))
        print(f'Epoch {epoch+1}: w = {w}, b = {b}, Loss = {loss}')

# Step 6: Make predictions (Custom Implementation)
def predict(x):
    z = np.dot(x, w) + b
    return (sigmoid(z) > 0.5).astype(int)

emails_sent = np.array([[5]])  # Predict for 5 promotional emails
custom_prediction = predict(emails_sent)
print(f'Custom Implementation Prediction for {emails_sent[0][0]} emails: {custom_prediction[0]}')

# Step 7: Train with Scikit-learn
model = LogisticRegression()
model.fit(X, y)

# Step 8: Compare predictions
sklearn_prediction = model.predict(emails_sent)
print(f'Scikit-learn Prediction for {emails_sent[0][0]} emails: {sklearn_prediction[0]}')

# Step 9: Accuracy comparison
custom_predictions = predict(X)
sklearn_predictions = model.predict(X)

custom_accuracy = accuracy_score(y, custom_predictions)
sklearn_accuracy = accuracy_score(y, sklearn_predictions)

print(f'Custom Implementation Accuracy: {custom_accuracy}')
print(f'Scikit-learn Accuracy: {sklearn_accuracy}')


   No_emails_sent  Discount_offered  Class
0               2                10      0
1               3                25      1
2               5                10      0
3               5                30      1
4               4                15      0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   No_emails_sent    6 non-null      int64
 1   Discount_offered  6 non-null      int64
 2   Class             6 non-null      int64
dtypes: int64(3)
memory usage: 272.0 bytes
None
Epoch 100: w = [0.05876587], b = -0.04118658263363282, Loss = 0.684430958482125
Epoch 200: w = [0.07009793], b = -0.09155052088430553, Loss = 0.6817648518954607
Epoch 300: w = [0.08089586], b = -0.1408638445751143, Loss = 0.6792154924722261
Epoch 400: w = [0.09145623], b = -0.1890874136187146, Loss = 0.6767775402832745
Epoch 500: w = [0.10178587], b = -0.236245214712

In [16]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Step 1: Load the data
file_path = 'heart.csv'
data = pd.read_csv(file_path)

# Step 2: Explore the data
print(data.head())
print(data.info())
print(data.describe())

# Step 3: Check for missing values
print("Missing values in each column:")
print(data.isnull().sum())

# Step 4: Preprocess the data (handle missing values if any)
data = data.dropna()  # Drop rows with missing values

# Convert categorical data to numerical if necessary
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = pd.factorize(data[column])[0]

# Step 5: Split features and target
X = data.drop(columns=['HeartDisease'])  # Assuming 'target' is the column to predict
y = data['HeartDisease']

# Step 6: Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Step 7: Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 8: Train the Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 9: Make predictions and evaluate accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Model Accuracy on Heart Disease Prediction: {accuracy}')


   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age    