In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = r"C:\Users\user\Desktop\OBS_Pred\ObesityDataSet_raw_and_data_sinthetic.csv"
df = pd.read_csv(file_path)

# Encode categorical features
label_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS', 'NObeyesdad']
encoder = LabelEncoder()

for col in label_cols:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
num_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
df[num_cols] = scaler.fit_transform(df[num_cols])

# Split features and labels
X = df.drop(columns=['NObeyesdad']).values
y = df['NObeyesdad'].values

# Convert to PyTorch tensors
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.long), torch.tensor(y_test, dtype=torch.long)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a simple feedforward neural network
class ObesityNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ObesityNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Model setup
input_size = X_train.shape[1]
hidden_size = 64
output_size = len(np.unique(y))  # Number of unique obesity classes

model = ObesityNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

# Evaluate the model
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(y_batch.numpy())

# Print accuracy and classification report
print("Accuracy:", accuracy_score(all_labels, all_preds))
print(classification_report(all_labels, all_preds))


Epoch 1/20, Loss: 95.6400
Epoch 2/20, Loss: 67.0345
Epoch 3/20, Loss: 46.5499
Epoch 4/20, Loss: 36.0398
Epoch 5/20, Loss: 28.1605
Epoch 6/20, Loss: 22.7283
Epoch 7/20, Loss: 19.1937
Epoch 8/20, Loss: 16.2071
Epoch 9/20, Loss: 13.9482
Epoch 10/20, Loss: 12.0684
Epoch 11/20, Loss: 10.7799
Epoch 12/20, Loss: 9.5159
Epoch 13/20, Loss: 8.7886
Epoch 14/20, Loss: 8.0113
Epoch 15/20, Loss: 7.5101
Epoch 16/20, Loss: 6.5919
Epoch 17/20, Loss: 6.2937
Epoch 18/20, Loss: 6.0901
Epoch 19/20, Loss: 5.7193
Epoch 20/20, Loss: 5.3810
Accuracy: 0.9385342789598109
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        54
           1       0.92      0.76      0.83        58
           2       0.96      1.00      0.98        70
           3       1.00      0.98      0.99        60
           4       1.00      1.00      1.00        65
           5       0.82      0.91      0.86        58
           6       0.98      0.90      0.94        58

    accuracy   

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = r"C:\Users\user\Desktop\OBS_Pred\ObesityDataSet_raw_and_data_sinthetic.csv"
df = pd.read_csv(file_path)

# Display basic information
print(df.info())
print(df.head())

# Encoding categorical features
label_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS', 'NObeyesdad']
encoder = LabelEncoder()

for col in label_cols:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
num_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
df[num_cols] = scaler.fit_transform(df[num_cols])

print(df.head())

X = df.drop(columns=['NObeyesdad'])  # Features
y = df['NObeyesdad']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train a Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2111 entries, 0 to 2110
Data columns (total 17 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Gender                          2111 non-null   object 
 1   Age                             2111 non-null   int64  
 2   Height                          2111 non-null   float64
 3   Weight                          2111 non-null   float64
 4   family_history_with_overweight  2111 non-null   object 
 5   FAVC                            2111 non-null   object 
 6   FCVC                            2111 non-null   float64
 7   NCP                             2111 non-null   float64
 8   CAEC                            2111 non-null   object 
 9   SMOKE                           2111 non-null   object 
 10  CH2O                            2111 non-null   float64
 11  SCC                             2111 non-null   object 
 12  FAF                             21

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = r"C:\Users\user\Desktop\OBS_Pred\ObesityDataSet_raw_and_data_sinthetic.csv"
df = pd.read_csv(file_path)

# Encoding categorical features
label_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS', 'NObeyesdad']
encoder = LabelEncoder()
for col in label_cols:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
num_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
df[num_cols] = scaler.fit_transform(df[num_cols])

# Splitting data into features (X) and target (y)
X = df.drop(columns=['NObeyesdad'])
y = df['NObeyesdad']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support Vector Machine": SVC(kernel='linear'),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

# Train and evaluate each model
for name, model in models.items():
    print(f"\nTraining {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Print results
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred))



Training Logistic Regression...
Accuracy: 0.8723
              precision    recall  f1-score   support

           0       0.90      0.96      0.93        54
           1       0.82      0.69      0.75        58
           2       0.89      0.94      0.92        70
           3       0.95      0.98      0.97        60
           4       1.00      0.98      0.99        65
           5       0.71      0.78      0.74        58
           6       0.81      0.74      0.77        58

    accuracy                           0.87       423
   macro avg       0.87      0.87      0.87       423
weighted avg       0.87      0.87      0.87       423


Training Support Vector Machine...
Accuracy: 0.9480
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        54
           1       0.96      0.83      0.89        58
           2       0.99      0.96      0.97        70
           3       0.97      1.00      0.98        60
           4       1.00      

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.9574
              precision    recall  f1-score   support

           0       0.96      0.91      0.93        54
           1       0.85      0.95      0.89        58
           2       0.97      0.97      0.97        70
           3       0.98      0.98      0.98        60
           4       1.00      0.98      0.99        65
           5       0.98      0.91      0.95        58
           6       0.97      0.98      0.97        58

    accuracy                           0.96       423
   macro avg       0.96      0.96      0.96       423
weighted avg       0.96      0.96      0.96       423

