In [38]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import joblib

# Load the data
data = pd.read_csv('archive-4/caffeine.csv')

# Convert 'Volume (ml)' and 'Caffeine (mg)' to numeric, handling errors
data['Volume (ml)'] = pd.to_numeric(data['Volume (ml)'], errors='coerce')
data['Caffeine (mg)'] = pd.to_numeric(data['Caffeine (mg)'], errors='coerce')
data['Calories'] = pd.to_numeric(data['Calories'], errors='coerce')

# Drop rows with missing values
data.dropna(subset=['Volume (ml)', 'Caffeine (mg)', 'Calories'], inplace=True)

# Features and Target variable
X = data[['drink', 'Volume (ml)', 'Caffeine (mg)']]
y = data['Calories']

# Preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', ['Volume (ml)', 'Caffeine (mg)']),
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['drink'])
    ]
)

# Creating a pipeline that first transforms the data and then fits a model with RandomForestRegressor
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Predicting the Test set results
y_pred = model.predict(X_test)

# Evaluating the model
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')

# Save the model to a pickle file
joblib.dump(model, 'caffeine_calories_predictor_rf.pkl')

RMSE: 97.57857991903957


['caffeine_calories_predictor_rf.pkl']

In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from math import sqrt
from sklearn.impute import SimpleImputer 

# Load the dataset
data = pd.read_csv('archive-4/calories.csv', delimiter=',')
data = pd.read_csv('archive-4/calories.csv', delimiter=',')

# Remove 'cal' from 'Cals_per100grams'
data['Cals_per100grams'] = data['Cals_per100grams'].str.replace(' cal', '')

# Remove 'kJ' from 'KJ_per100grams'
data['KJ_per100grams'] = data['KJ_per100grams'].str.replace(' kJ', '')

# Remove 'g' from 'per100grams' - assuming you want to convert this to a numeric value as well
data['per100grams'] = data['per100grams'].str.replace('g', '')

# Convert columns to numeric
data['Cals_per100grams'] = pd.to_numeric(data['Cals_per100grams'], errors='coerce')
data['KJ_per100grams'] = pd.to_numeric(data['KJ_per100grams'], errors='coerce')
data['per100grams'] = pd.to_numeric(data['per100grams'], errors='coerce')


# Assuming 'per100grams' is always '100g', so we directly use 'Cals_per100grams' for prediction
# Preprocess the data
X = data[['FoodItem', 'per100grams']]  # Features - using only FoodItem for simplicity, but you can include more
y = data['Cals_per100grams']  # Target variable

# Convert 'FoodItem' into numerical format using OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(handle_unknown='ignore'), ['FoodItem']),
        ('num', SimpleImputer(strategy='mean'), ['per100grams'])  # Impute missing values with the mean

    ],
    remainder='passthrough'
)

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a pipeline that first transforms the data and then fits a model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

# Training the model
model.fit(X_train, y_train)

# Predicting the Test set results
y_pred = model.predict(X_test)

# Evaluating the model
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')
joblib.dump(model, 'fooditem_calories_predictor.pkl')

RMSE: 164.13108182167568


['fooditem_calories_predictor.pkl']

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer  # Corrected import statement
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load the dataset
data = pd.read_csv('archive-4/fastfood_calories.csv')

# Drop the first unnamed column if it exists
if 'Unnamed: 0' in data.columns:
    data.drop('Unnamed: 0', axis=1, inplace=True)

# Assuming 'calories' is the target variable and the rest are features
X = data.drop('calories', axis=1)
y = data['calories']

# Identify categorical and numerical features
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

# Preprocessing: OneHotEncode categorical variables and impute missing values
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', SimpleImputer(strategy='mean'), numerical_features)
    ],
    remainder='passthrough'
)

# Define the model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'RMSE: {rmse}')

# Save the model to a pickle file
joblib.dump(model, 'fastfood_calories_predictor.pkl')

RMSE: 92.98479884117609




['fastfood_calories_predictor.pkl']

The below one dont work as i do not have weights distribution


In [48]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('archive-4/exercise_dataset.csv')

# Transform the dataset to have a row for each weight category
melted_data = pd.melt(data, id_vars=['Activity, Exercise or Sport (1 hour)', 'Calories per kg'], 
                      value_vars=['130 lb', '155 lb', '180 lb', '205 lb'],
                      var_name='Weight_Category', value_name='Calories_Burned')

# Convert weight category to numeric by extracting the number
# melted_data['Weight_Category'] = melted_data['Weight_Category'].str.extract('(\d+)').astype(int)
melted_data['Weight_Category'] = melted_data['Weight_Category'].str.extract(r'(\d+)').astype(int)

# Use 'Calories per kg' as the target for prediction
X = melted_data[['Weight_Category', 'Calories_Burned']]  # Features
y = melted_data['Activity, Exercise or Sport (1 hour)']  # Target

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

# Creating and Training the model
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train_scaled, y_train)

# Save the trained model
joblib.dump(model, 'exercise_predictor_model.pkl')

# Save the scaler
joblib.dump(sc, 'scaler.pkl')

['scaler.pkl']

In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import joblib

# Load the dataset
data = pd.read_csv('archive-4/Activity_Dataset_V1.csv')

# Select only the 'calories' column as feature and 'workout_type' as the target
X = data[['calories']]
y = data['workout_type']

# Handle missing values in 'calories'
# Here, we'll impute missing values with the median, but you can choose a strategy that fits your data best
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Define the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Save the model and the imputer to pickle files
joblib.dump(model, 'calories_based_workout_predictor.pkl')
joblib.dump(imputer, 'calories_imputer.pkl')

['calories_imputer.pkl']

Cricket: 17.89 minutes at 5.59 calories/minute
Outdoor Running: 18.32 minutes at 5.46 calories/minute
Outdoor Cycling: 18.86 minutes at 5.30 calories/minute


In [57]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import pandas as pd
import joblib

# Assuming 'data' is your DataFrame with the necessary features and targets
X = data[['calories', 'time']]  # Features for classification
y_class = data['workout_type']  # Target for classification

# Splitting data for classification model
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Training classification model
classifier = RandomForestClassifier()
classifier.fit(X_train_class, y_train_class)

# For each workout type, train a regressor
regressors = {}
for workout in y_class.unique():
    workout_data = data[data['workout_type'] == workout]
    X = workout_data[['calories']]  # Assuming calories is a significant feature for regression
    y_reg = workout_data['time']  # Target for regression
    
    X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)
    
    regressor = RandomForestRegressor()
    regressor.fit(X_train_reg, y_train_reg)
    regressors[workout] = regressor

# Save models
joblib.dump(classifier, 'exercise_classifier.pkl')
for workout, regressor in regressors.items():
    joblib.dump(regressor, f'{workout}_time_regressor.pkl')