In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

In [5]:
data = pd.read_csv("child_development_data.csv")

In [6]:
data.head()

Unnamed: 0,Age,Gender,Height (cm),Weight (kg),Activity Level,Health Issues,Emotional Well-being,Social Interaction,Cognitive Milestones,Mental Health History,Confidence and Self-esteem
0,6,Other,121.5,36.1,Moderate,Sport Injury/Accident,Normal,Weekly,No,No,Confused
1,17,Female,122.4,66.7,Low,,Happy,Weekly,Yes,No,High
2,10,Female,178.3,66.6,High,Asthma,Happy,,Yes,No,Confused
3,6,Female,129.0,71.2,Low,Sport Injury/Accident,Normal,Daily,Yes,Yes,High
4,6,Male,114.4,21.1,High,Sport Injury/Accident,Normal,,No,Yes,Confused


In [7]:
data['BMI'] = data['Weight (kg)'] / ((data['Height (cm)'] / 100) ** 2)

In [9]:
data['BMI_Category'] = data['BMI'].apply(bmi_category)

In [10]:
# Handle missing values
data = data.fillna(data.median(numeric_only=True))

In [11]:
# Encode categorical variables
label_encoders = {}
for col in ['Gender', 'Activity Level', 'Health Issues', 'Emotional Well-being',
            'Social Interaction', 'Cognitive Milestones', 'Mental Health History',
            'Confidence and Self-esteem']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [12]:
# Prepare features and target
X = data[['Age', 'Gender', 'Height (cm)', 'Weight (kg)', 'Activity Level',
          'Health Issues', 'Emotional Well-being', 'Social Interaction',
          'Cognitive Milestones', 'Mental Health History', 'Confidence and Self-esteem']]
y = data['BMI_Category']

In [13]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [20]:
X_test

Unnamed: 0,Age,Gender,Height (cm),Weight (kg),Activity Level,Health Issues,Emotional Well-being,Social Interaction,Cognitive Milestones,Mental Health History,Confidence and Self-esteem
1860,18,0,138.8,72.8,2,0,0,2,0,0,1
353,7,1,104.1,69.3,1,1,0,1,1,0,0
1333,18,0,119.1,84.5,0,5,1,2,1,1,2
905,8,2,132.0,20.8,2,4,0,1,0,1,0
1289,18,2,146.2,84.6,2,0,2,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...
965,15,2,136.6,51.3,2,5,1,2,0,0,0
1284,18,2,118.0,52.0,0,3,2,0,1,0,0
1739,15,1,162.9,54.0,0,4,2,2,1,1,0
261,6,1,132.3,53.8,2,4,1,0,1,0,1


In [24]:
sample_data = {
    'Age': 10,
    'Gender': 1,  # Categorical, needs encoding
    'Height (cm)': 140,
    'Weight (kg)': 35,
    'Activity Level': 2,  # Categorical, needs encoding
    'Health Issues': 5,  # Categorical, needs encoding
    'Emotional Well-being': 0,  # Categorical, needs encoding
    'Social Interaction': 1,  # Categorical, needs encoding
    'Cognitive Milestones': 0,  # Categorical, needs encoding
    'Mental Health History': 1,  # Categorical, needs encoding
    'Confidence and Self-esteem': 0  # Categorical, needs encoding
}

In [23]:
# Check the label encoding for each categorical column
for col, le in label_encoders.items():
    print(f"Label encoding for {col}:")
    for index, label in enumerate(le.classes_):
        print(f"{label}: {index}")
    print("\n")


Label encoding for Gender:
Female: 0
Male: 1
Other: 2


Label encoding for Activity Level:
High: 0
Low: 1
Moderate: 2


Label encoding for Health Issues:
ADHD: 0
Asthma: 1
Critical Health Issue: 2
Obesity: 3
Sport Injury/Accident: 4
nan: 5


Label encoding for Emotional Well-being:
Happy: 0
Normal: 1
Sad: 2


Label encoding for Social Interaction:
Daily: 0
Weekly: 1
nan: 2


Label encoding for Cognitive Milestones:
No: 0
Yes: 1


Label encoding for Mental Health History:
No: 0
Yes: 1


Label encoding for Confidence and Self-esteem:
Confused: 0
High: 1
Low: 2




In [25]:
# Convert the sample data into a DataFrame (or array) with the same structure as X
sample_df = pd.DataFrame([sample_data])

# Predict the BMI category
predicted_bmi_category = model.predict(sample_df)

print(f"Predicted BMI Category: {predicted_bmi_category[0]}")

Predicted BMI Category: Underweight


In [27]:
with open('bmi_model.pkl', 'wb') as f:
    pickle.dump(model, f)