In [1]:
import pandas as pd
import numpy as np

print(" Creating simple dataset...")

# Create a simple synthetic dataset
np.random.seed(42)
n_samples = 100

# Features: age, income
age = np.random.randint(18, 65, n_samples)
income = age * 1000 + np.random.normal(0, 5000, n_samples)

# Target: can_buy_house (1 if income > 50000, 0 otherwise)
target = (income > 50000).astype(int)

# Create DataFrame
data = pd.DataFrame({
    'age': age,
    'income': income,
    'can_buy_house': target
})

print(f" Dataset created with {len(data)} samples")
print(f" Features: age, income")
print(f" Target: can_buy_house")
print("\nFirst 5 rows:")
print(data.head())

dataset_size = len(data)
feature_count = 2

 Creating simple dataset...
 Dataset created with 100 samples
 Features: age, income
 Target: can_buy_house

First 5 rows:
   age        income  can_buy_house
0   56  59181.525542              1
1   46  41466.396657              0
2   32  34380.212937              0
3   60  66518.306342              1
4   25  26057.935062              0


In [2]:
# Data preparation
from sklearn.model_selection import train_test_split

print(" Preparing data for training...")

# Separate features and target
X = data[['age', 'income']]
y = data['can_buy_house']

print(f" Features shape: {X.shape}")
print(f" Target shape: {y.shape}")

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f" Training samples: {len(X_train)}")
print(f" Test samples: {len(X_test)}")

# Basic statistics
print(f"\n Training data stats:")
print(f"   Average age: {X_train['age'].mean():.1f}")
print(f"   Average income: ${X_train['income'].mean():.0f}")
print(f"   Positive cases: {y_train.sum()}/{len(y_train)}")

train_samples = len(X_train)
test_samples = len(X_test)

print(" Data preparation completed!")

 Preparing data for training...
 Features shape: (100, 2)
 Target shape: (100,)
 Training samples: 70
 Test samples: 30

 Training data stats:
   Average age: 41.2
   Average income: $42049
   Positive cases: 22/70
 Data preparation completed!


In [3]:
# Model training
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

print(" Training model...")

# Create and train model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

print(" Model training completed!")

# Check training accuracy
train_predictions = model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)

print(f" Training accuracy: {train_accuracy:.3f}")

# Model info
model_type = "LogisticRegression"
coefficients = model.coef_[0]

print(f" Model coefficients:")
print(f"   Age coefficient: {coefficients[0]:.4f}")
print(f"   Income coefficient: {coefficients[1]:.6f}")

training_accuracy = train_accuracy

 Training model...
 Model training completed!
 Training accuracy: 1.000
 Model coefficients:
   Age coefficient: 0.0009
   Income coefficient: 0.035647


In [4]:
# Model evaluation
from sklearn.metrics import classification_report

print(" Evaluating model...")

# Make predictions
test_predictions = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f" Test accuracy: {test_accuracy:.3f}")

# Detailed evaluation
print("\n Classification Report:")
print(classification_report(y_test, test_predictions))

# Simple predictions on new data
print("\n Sample predictions:")
sample_data = [[25, 30000], [45, 80000], [35, 60000]]
sample_predictions = model.predict(sample_data)

for i, (age, income) in enumerate(sample_data):
    prediction = "Yes" if sample_predictions[i] == 1 else "No"
    print(f"   Age {age}, Income ${income:,} → Can buy house: {prediction}")

# Final metrics
final_accuracy = test_accuracy
total_correct = int(test_accuracy * len(y_test))
model_performance = "Good" if test_accuracy > 0.8 else "Fair" if test_accuracy > 0.6 else "Poor"

print(f"\n Evaluation completed!")
print(f" Final accuracy: {final_accuracy:.3f}")
print(f" Correct predictions: {total_correct}/{len(y_test)}")
print(f" Model performance: {model_performance}")

 Evaluating model...
 Test accuracy: 1.000

 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00         7

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


 Sample predictions:
   Age 25, Income $30,000 → Can buy house: No
   Age 45, Income $80,000 → Can buy house: Yes
   Age 35, Income $60,000 → Can buy house: Yes

 Evaluation completed!
 Final accuracy: 1.000
 Correct predictions: 30/30
 Model performance: Good


