In [None]:
# Install scikit-learn if not installed
# pip install scikit-learn

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Sample data
# Let's say we want to predict if someone buys a product based on their age and income
# X = [[Age, Income]]
X = [
    [22, 15000],
    [25, 18000],
    [47, 25000],
    [52, 30000],
    [46, 28000],
    [56, 32000],
    [48, 34000],
    [55, 36000]
]

# Labels: 0 = Did not buy, 1 = Bought
y = [0, 0, 1, 1, 1, 1, 1, 1]

# Split data into training and testing sets (optional but good practice)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Create the model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Check accuracy
print("Predictions:", y_pred)
print("Accuracy:", accuracy_score(y_test, y_pred))

# Predict for new data
new_person = [[30, 20000]]
prediction = model.predict(new_person)
probability = model.predict_proba(new_person)

print("Will the person buy? (1=Yes, 0=No):", prediction[0])
print("Probability of buying:", probability[0][1])


X is your input data: Age and Income.

y is your target: whether they bought or not.

We train the model with fit().

We predict new outcomes with predict().

We get probabilities with predict_proba().

In [None]:
##output
#Predictions: [1 1]
#Accuracy: 1.0
#Will the person buy? (1=Yes, 0=No): 0
#Probability of buying: 0.421


Another Example

In [None]:
# Install scikit-learn if you don't have it:
# pip install scikit-learn

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset: Predict whether a student passes based on study hours
# Input feature: [hours studied]
X = [
    [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]
]

# Output labels: 0 = Fail, 1 = Pass
y = [
    0, 0, 0, 0, 1, 1, 1, 1, 1, 1
]

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on test set
y_pred = model.predict(X_test)

# Evaluate model performance
print("Test Set Predictions:", y_pred)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Predict for a new student who studied 3.5 hours
new_data = [[3.5]]
predicted_class = model.predict(new_data)
predicted_probability = model.predict_proba(new_data)

print("\nNew student who studied 3.5 hours:")
print("Predicted Class (0=Fail, 1=Pass):", predicted_class[0])
print("Predicted Probability of Pass:", predicted_probability[0][1])


In [None]:
##output

Test Set Predictions: [1 1 1]
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

New student who studied 3.5 hours:
Predicted Class (0=Fail, 1=Pass): 0
Predicted Probability of Pass: 0.458


Explanation of Output
The model correctly classified all the test data with 100% accuracy (small dataset).

For a student who studied 3.5 hours, the model predicts class 0 (Fail), with a 45.8% probability of passing.

This makes sense because 3.5 is near the decision boundary (probably around 4-5 hours in this case).