In [None]:

# --- 1. INSTALL LIBRARIES ---
print("STEP 1: Installing required libraries...")
!pip install -q tensorflow keras scikit-learn pandas joblib
print("Libraries installed successfully.")
print("-" * 30)

# --- 2. IMPORT LIBRARIES & CONFIGURE ---
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import joblib
import os
import xml.etree.ElementTree as ET # Library to read XML files
import csv # Library to write CSV files

# --- Configuration ---
# The file you will upload from Kaggle
INPUT_XML_FILE = '559-ws-training.xml'
# The file we will create for our model
OUTPUT_CSV_FILE = 'glucose_data.csv'

MODEL_SAVE_PATH = 'glucose_predictor.h5'
SCALER_SAVE_PATH = 'scaler.gz'
LOOK_BACK = 12
EPOCHS = 5
BATCH_SIZE = 32

# --- 3. CONVERT XML TO CSV (This runs AFTER you upload) ---
print(f"STEP 2: Ready for data. Please upload '{INPUT_XML_FILE}' now.")

if os.path.exists(INPUT_XML_FILE):
    print(f"STEP 3: XML file found! Converting '{INPUT_XML_FILE}' to '{OUTPUT_CSV_FILE}'...")

    # Parse the XML file
    tree = ET.parse(INPUT_XML_FILE)
    root = tree.getroot()

    # Open a new CSV file to write to
    with open(OUTPUT_CSV_FILE, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Write the header row
        writer.writerow(['timestamp', 'glucose_value'])
        # Find all 'event' tags and write their data to the CSV
        for event in root.findall('./glucose_level/event'):
            ts = event.get('ts')
            value = event.get('value')
            writer.writerow([ts, value])

    print("Conversion complete!")
    print("-" * 30)

    # --- 4. PREPARE THE DATA from our NEW CSV file ---
    def create_sequences(dataset, look_back):
        dataX, dataY = [], []
        for i in range(len(dataset) - look_back - 1):
            a = dataset[i:(i + look_back), 0]
            dataX.append(a)
            dataY.append(dataset[i + look_back, 0])
        return np.array(dataX), np.array(dataY)

    print("STEP 4: Loading and preparing data from the new CSV file...")
    df = pd.read_csv(OUTPUT_CSV_FILE, usecols=['glucose_value']).dropna().reset_index(drop=True)
    dataset = df.values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset_scaled = scaler.fit_transform(dataset)
    trainX, trainY = create_sequences(dataset_scaled, LOOK_BACK)
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    print("Data preparation complete.")
    print("-" * 30)

    # --- 5. BUILD AND TRAIN THE MODEL ---
    print("STEP 5: Building and training the LSTM model...")
    model = Sequential()
    model.add(LSTM(16, input_shape=(LOOK_BACK, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)
    print("Model training complete.")
    print("-" * 30)

    # --- 6. SAVE THE RESULTS ---
    print("STEP 6: Saving the trained model and scaler...")
    model.save(MODEL_SAVE_PATH)
    joblib.dump(scaler, SCALER_SAVE_PATH)
    print(f"Artifacts saved: '{MODEL_SAVE_PATH}' and '{SCALER_SAVE_PATH}'")
    print("\n--- ALL DONE! You can now download your files from the file pane. ---")
else:
    print(f"\nERROR: Could not find the file. Please upload '{INPUT_XML_FILE}' and then run this cell again.")