In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

families = []
n_families = 1000
children_per_family = 5

for family_id in range(1, n_families + 1):
    previous_boys = 0
    for child_number in range(1, children_per_family + 1):
        # Randomly determine gender (50/50)
        gender = np.random.choice(['Boy', 'Girl'])

        # Record data
        families.append({
            'family_id': family_id,
            'child_number': child_number,
            'gender': gender,
            'previous_boys': previous_boys
        })

        # Update previous boys count
        if gender == 'Boy':
            previous_boys += 1

# Create DataFrame
df = pd.DataFrame(families)

# Preview the dataset
print(df.head())


   family_id  child_number gender  previous_boys
0          1             1    Boy              0
1          1             2   Girl              1
2          1             3    Boy              1
3          1             4    Boy              2
4          1             5    Boy              3


In [None]:
df

Unnamed: 0,family_id,child_number,gender,previous_boys
0,1,1,Boy,0
1,1,2,Girl,1
2,1,3,Boy,1
3,1,4,Boy,2
4,1,5,Boy,3
...,...,...,...,...
4995,1000,1,Boy,0
4996,1000,2,Girl,1
4997,1000,3,Boy,1
4998,1000,4,Girl,2


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np

is_girl = 1 if gender == "Girl" else 0

# Load or generate the dataset
# (Use code from previous message if you haven’t created df yet)

df['is_girl'] = df['gender'].map({'Girl': 1, 'Boy': 0})

# Features and target
X = df[['previous_boys']]
y = df['is_girl']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(classification_report(
    y_test,
    y_pred,
    target_names=['Boy', 'Girl']
))


              precision    recall  f1-score   support

         Boy       0.43      0.08      0.13       515
        Girl       0.48      0.89      0.62       485

    accuracy                           0.47      1000
   macro avg       0.45      0.48      0.37      1000
weighted avg       0.45      0.47      0.37      1000



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np

is_girl = 1 if gender == "Girl" else 0

# Load or generate the dataset
# (Use code from previous message if you haven’t created df yet)

df['is_girl'] = df['gender'].map({'Girl': 1, 'Boy': 0})

# Features and target
X = df[['previous_boys']]
y = df['is_girl']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit logistic regression model
model = LogisticRegressionCV(cv=5, class_weight='balanced', max_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(classification_report(
    y_test,
    y_pred,
    target_names=['Boy', 'Girl']
))


              precision    recall  f1-score   support

         Boy       0.51      0.59      0.55       515
        Girl       0.48      0.41      0.44       485

    accuracy                           0.50      1000
   macro avg       0.50      0.50      0.50      1000
weighted avg       0.50      0.50      0.50      1000

