In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import os
os.listdir()

['.config',
 'train.csv',
 'test.csv',
 'sample_submission.csv',
 'drive',
 'sample_data']

In [None]:
import pandas as pd

train_df = pd.read_csv("/content/train.csv")  # or another filename
test_df = pd.read_csv("/content/test.csv")
sample_submission_df = pd.read_csv("/content/sample_submission.csv")  # or another filename

train_df.head()  # shows the first 5 rows

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,male,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,female,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,female,51,161.0,64.0,7.0,84.0,39.8,29.0
3,3,male,20,192.0,90.0,25.0,105.0,40.7,140.0
4,4,female,38,166.0,61.0,25.0,102.0,40.6,146.0


In [None]:
print(train_df.isna().sum())
print(test_df.isna().sum())

train_df = train_df.dropna()
test_df = test_df.dropna()

id            0
Sex           0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
Calories      0
dtype: int64
id            0
Sex           0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
dtype: int64


# 1. Encode Categorical Column (Sex)

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
train_df['Sex'] = le.fit_transform(train_df['Sex'])
test_df['Sex'] = le.transform(test_df['Sex'])  # use same encode

In [None]:
train_df.head(3)

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,1,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,0,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,0,51,161.0,64.0,7.0,84.0,39.8,29.0


In [None]:
features = ['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
target = 'Calories'

X = train_df[features].values.astype(np.float32)
y = train_df[target].values.astype(np.float32).reshape(-1, 1)

In [None]:
X

array([[  1. ,  36. , 189. , ...,  26. , 101. ,  41. ],
       [  0. ,  64. , 163. , ...,   8. ,  85. ,  39.7],
       [  0. ,  51. , 161. , ...,   7. ,  84. ,  39.8],
       ...,
       [  1. ,  60. , 162. , ...,  29. , 113. ,  40.9],
       [  1. ,  45. , 182. , ...,  17. , 102. ,  40.3],
       [  0. ,  39. , 171. , ...,  19. ,  97. ,  40.6]], dtype=float32)

In [None]:
y

array([[150.],
       [ 34.],
       [ 29.],
       ...,
       [221.],
       [109.],
       [103.]], dtype=float32)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train

array([[  1. ,  43. , 190. , ...,   6. ,  87. ,  39.1],
       [  0. ,  48. , 155. , ...,  12. ,  97. ,  40.2],
       [  1. ,  51. , 187. , ...,  15. ,  96. ,  40.5],
       ...,
       [  1. ,  37. , 191. , ...,  28. , 108. ,  40.7],
       [  1. ,  42. , 193. , ...,  17. ,  93. ,  40. ],
       [  1. ,  35. , 184. , ...,  11. ,  87. ,  39.5]], dtype=float32)

In [None]:
X_val

array([[  0. ,  56. , 163. , ...,  29. , 108. ,  40.8],
       [  1. ,  44. , 179. , ...,  13. ,  94. ,  40.3],
       [  1. ,  41. , 186. , ...,  10. ,  89. ,  39.9],
       ...,
       [  0. ,  37. , 167. , ...,   7. ,  96. ,  39.2],
       [  0. ,  21. , 174. , ...,  10. ,  86. ,  39.7],
       [  0. ,  40. , 176. , ...,  26. , 109. ,  41.1]], dtype=float32)

In [None]:
y_train

array([[ 25.],
       [ 67.],
       [ 86.],
       ...,
       [192.],
       [ 81.],
       [ 38.]], dtype=float32)

In [None]:
y_val

array([[200.],
       [ 64.],
       [ 39.],
       ...,
       [ 35.],
       [ 37.],
       [167.]], dtype=float32)

# 2. Convert to PyTorch tensors

In [None]:
from torch.utils.data import Dataset

class CalorieDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32) if y is not None else None

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

train_data = CalorieDataset(X_train,y_train)
val_data = CalorieDataset(X_val,y_val)


In [None]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

# 3. Define the Model

In [None]:
class CalorieModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.net = nn.Sequential(
        nn.Linear(7,64),
        nn.ReLU(),
        nn.Linear(64,32),
        nn.ReLU(),
        nn.Linear(32,1)
    )

  def forward(self, x):
    return self.net(x)

# 4. Custom RMSLE Loss Function

 RMSLE needs positive values

In [None]:
def rmsle_loss(preds, targets):
  # Calculate the RMSLE loss
  preds = torch.clamp(preds, min=1e-6)  # to avoid log(0)
  targets = torch.clamp(targets, min=1e-6)
  return torch.sqrt(torch.mean((torch.log1p(preds) - torch.log1p(targets)) ** 2))

# 5. Training the Model

In [None]:
print(np.isnan(X_train).any(), np.isinf(X_train).any())
print(np.isnan(y_train).any(), np.isinf(y_train).any())

False False
False False


In [None]:
model = CalorieModel()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(50):
  model.train()
  total_loss = 0
  for xb, yb in train_loader:
    preds = model(xb)
    loss =rmsle_loss(preds, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_loss += loss.item()

  print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}")

Epoch 1, Loss: 0.18324970008770625
Epoch 2, Loss: 0.09296460397303104
Epoch 3, Loss: 0.08271826683223248
Epoch 4, Loss: 0.07826963715473811
Epoch 5, Loss: 0.07565975867241621
Epoch 6, Loss: 0.07363916036526362
Epoch 7, Loss: 0.07212399868090948
Epoch 8, Loss: 0.07119040320644776
Epoch 9, Loss: 0.07018896449436744
Epoch 10, Loss: 0.06961446323494117
Epoch 11, Loss: 0.06918034979174534
Epoch 12, Loss: 0.06859910633375248
Epoch 13, Loss: 0.06824065446803967
Epoch 14, Loss: 0.06771490699579319
Epoch 15, Loss: 0.06744113510578871
Epoch 16, Loss: 0.06706789853662252
Epoch 17, Loss: 0.06665717582265536
Epoch 18, Loss: 0.06622942037870487
Epoch 19, Loss: 0.06567347106148799
Epoch 20, Loss: 0.06522385697315136
Epoch 21, Loss: 0.06482812053581079
Epoch 22, Loss: 0.06444903446783622
Epoch 23, Loss: 0.06406862054526806
Epoch 24, Loss: 0.06355052760829528
Epoch 25, Loss: 0.06319223187396923
Epoch 26, Loss: 0.06298055137584607
Epoch 27, Loss: 0.06265527233054241
Epoch 28, Loss: 0.06255118263224761
E

# 6. Validation

In [None]:
  # ✅ validation 평가
  model.eval()
  val_loss = 0
  with torch.no_grad():
    for xb, yb in val_loader:  # val_loader는 X_val, y_val로 만든 DataLoader
      preds = model(xb)
      loss = rmsle_loss(preds, yb)
      val_loss += loss.item()

  print(f"Epoch {epoch+1}, Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

Epoch 50, Train Loss: 0.0602, Val Loss: 0.0607


# 7. Train set Evaluation

In [None]:
test_df.head()

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,750000,1,45,177.0,81.0,7.0,87.0,39.8
1,750001,1,26,200.0,97.0,20.0,101.0,40.5
2,750002,0,29,188.0,85.0,16.0,102.0,40.4
3,750003,0,39,172.0,73.0,20.0,107.0,40.6
4,750004,0,30,173.0,67.0,16.0,94.0,40.5


In [None]:
# Select features from the test dataframe
features = ['Sex', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
test_df = test_df[features]

# Now convert to numpy array with float32 dtype
test_df = test_df.values.astype(np.float32)

print(test_df[:5])

[[  1.   45.  177.   81.    7.   87.   39.8]
 [  1.   26.  200.   97.   20.  101.   40.5]
 [  0.   29.  188.   85.   16.  102.   40.4]
 [  0.   39.  172.   73.   20.  107.   40.6]
 [  0.   30.  173.   67.   16.   94.   40.5]]


In [None]:
test_data = CalorieDataset(test_df, None)
test_loader = DataLoader(test_data, batch_size=32)

# Get predictions on the test set
model.eval() # Set the model to evaluation mode
predictions = []
with torch.no_grad(): # Disable gradient calculation
    for xb in test_loader:
        # The DataLoader for a TensorDataset with one tensor will return a tuple containing that tensor
        # If y is None in CalorieDataset, the DataLoader yields only the features tensor
        xb = xb # xb is already the tensor containing features
        preds = model(xb)
        predictions.extend(preds.squeeze().tolist()) # Get predictions and convert to a list

# Now you have the predictions in the 'predictions' list
print("Predictions on the test set:")
print(predictions[:10]) # Print the first 10 predictions as an example

# If you need the predictions as a numpy array
predictions_np = np.array(predictions)
print("\nPredictions as NumPy array shape:", predictions_np.shape)

Predictions on the test set:
[25.45142364501953, 105.19139099121094, 86.03291320800781, 124.41858673095703, 74.66568756103516, 21.85638999938965, 46.00728988647461, 7.280529499053955, 9.631978988647461, 201.64852905273438]

Predictions as NumPy array shape: (250000,)


In [None]:
sample_submission_df.head()

Unnamed: 0,id,Calories
0,750000,88.283
1,750001,88.283
2,750002,88.283
3,750003,88.283
4,750004,88.283


In [None]:
predictions_np = np.maximum(predictions_np, 0)

In [None]:
test_df_original = pd.read_csv("/content/test.csv")

submission = pd.DataFrame({'id': test_df_original['id'], 'Calories': predictions_np})
submission.to_csv('submission(2).csv', index=False)
submission.head()

Unnamed: 0,id,Calories
0,750000,25.451424
1,750001,105.191391
2,750002,86.032913
3,750003,124.418587
4,750004,74.665688
