In [51]:
import os, glob, sqlite3
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from datetime import datetime

In [52]:
os.makedirs("Fitness Tracker Dataset", exist_ok=True)
Dataset_Path = os.path.join("Fitness Tracker Dataset", "Fitness Tracker Dataset.csv")

In [53]:
Fitness_Tracker = pd.read_csv(Dataset_Path)

In [54]:
def Remove_Future(Fitness_Tracker):
    Fitness_Tracker['Datetime'] = pd.to_datetime(Fitness_Tracker['Datetime'], errors='coerce')
    mask = (Fitness_Tracker['Datetime'].dt.year == 2025) | (Fitness_Tracker['Datetime'].dt.year == 2024) & (Fitness_Tracker['Datetime'].dt.month >= 10)
    Fitness_Tracker = Fitness_Tracker[~mask]
    Fitness_Tracker['Datetime'] = Fitness_Tracker['Datetime'].dt.strftime('%d-%m-%Y')
    Fitness_Tracker.to_csv(Dataset_Path, index=False)

In [55]:
class SQLite3:
    def __init__(self, Dataset, Name):
        self.Dataset = Dataset
        self.Name = Name
        
    def Data_Statistics(self):
        if f'{self.Name}.db' not in glob.glob("*.db"):
            Connection = sqlite3.connect(f'{self.Name}.db')
            self.Dataset.to_sql(self.Name, Connection, if_exists='replace', index=False)
            print("Database already created!")
            return Connection
        else: return sqlite3.connect(f'{self.Name}.db')

    def Exc(self, Query):
        Connection = self.Data_Statistics()
        return pd.read_sql_query(Query, Connection)

Name = 'Fitness Tracker'
Query = f"""
    SELECT * 
    FROM '{Name}'
"""   
SQLite3(pd.read_csv(Dataset_Path), Name).Exc(Query)     

Unnamed: 0,User ID,Datetime,Steps,Calories Burned,Distance,Activity,Sleep,Heart Rate Avg,Workout Type,Weather Conditions,Location,Mood
0,468,01-01-2023,4530,2543.02,16.10,613,1.5,176,Walking,Clear,Park,Tired
1,879,01-01-2023,11613,1720.76,8.10,352,6.3,128,Cycling,Fog,Park,Happy
2,152,01-01-2023,27335,1706.35,3.57,236,6.7,134,Yoga,Snow,Park,Neutral
3,311,01-01-2023,13459,2912.38,6.41,1329,11.6,116,Swimming,Rain,Office,Tired
4,759,01-01-2023,15378,3344.51,17.88,52,7.4,84,Swimming,Rain,Office,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...
638995,439,30-09-2024,9325,2648.95,5.45,630,11.9,135,Walking,Snow,Other,Stressed
638996,746,30-09-2024,13763,1841.06,14.36,1418,6.8,153,Gym Workout,Clear,Gym,Tired
638997,216,30-09-2024,1549,3773.45,2.79,16,11.6,133,Walking,Snow,Gym,Stressed
638998,218,30-09-2024,7313,2053.57,1.16,218,9.8,60,Gym Workout,Rain,Gym,Happy


In [60]:
Data = pd.read_csv(Dataset_Path)

Data['Datetime'] = pd.to_datetime(Data['Datetime'], format='%d-%m-%Y', errors='coerce')
Data['Datetime'] = Data['Datetime'].map(pd.Timestamp.timestamp)

# Chọn các cột đặc trưng và nhãn
X = Data[['Datetime', 'Steps', 'Calories Burned', 'Distance', 'Sleep', 
          'Heart Rate Avg', 'Workout Type', 'Weather Conditions', 'Location', 'Activity']]
Y = Data['Mood']

# Xử lý dữ liệu
Preprocessor = ColumnTransformer(
    transformers=[
        ('Data Type Conversion', StandardScaler(), ['Datetime', 'Steps', 'Calories Burned', 
                                   'Distance', 'Sleep', 'Heart Rate Avg']),
        ('Categorical Encoding', OneHotEncoder(), ['Workout Type', 'Weather Conditions', 
                                  'Location', 'Activity'])
    ])

# Chia tập dữ liệu thành tập huấn luyện và tập kiểm tra
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Áp dụng tiền xử lý cho dữ liệu
X_train = Preprocessor.fit_transform(X_train)
X_test = Preprocessor.transform(X_test)

# Mã hóa nhãn mục tiêu (Mood) bằng OneHotEncoder
encoder = OneHotEncoder()
Y_train = encoder.fit_transform(Y_train.values.reshape(-1, 1)).toarray()
Y_test = encoder.transform(Y_test.values.reshape(-1, 1)).toarray()

# Xây dựng mô hình mạng neural với TensorFlow
Model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(Y_train.shape[1], activation='softmax')  # Lớp đầu ra với softmax cho phân loại nhiều lớp
])

# Compile mô hình
Model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

History = Model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=5, batch_size=32)

# Bước 6: Đánh giá mô hình
test_loss, test_accuracy = Model.evaluate(X_test, Y_test)
print(f"Độ chính xác trên tập kiểm tra: {test_accuracy * 100:.2f}%")


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Độ chính xác trên tập kiểm tra: 24.85%


In [63]:
# Bước 7: Dự đoán tâm trạng mới
new_data = pd.DataFrame({
    'Datetime': [pd.Timestamp('2024-11-01 08:00').timestamp()],
    'Steps': [5000], 
    'Calories Burned': [300], 
    'Distance': [4.5], 
    'Sleep': [7], 
    'Heart Rate Avg': [70], 
    'Workout Type': ['Yoga'], 
    'Weather Conditions': ['Clear'], 
    'Location': ['Home'], 
    'Activity': [1131]
})

# Tiền xử lý dữ liệu mới
new_data_processed = Preprocessor.transform(new_data)

# Dự đoán tâm trạng
predicted_mood = Model.predict(new_data_processed)
predicted_mood_label = encoder.inverse_transform(predicted_mood)
print(f'Dự đoán tâm trạng: {predicted_mood_label[0][0]}')

Dự đoán tâm trạng: Neutral
