<a href="https://colab.research.google.com/github/michaelthephoenix/3Dfolio-2019/blob/master/notebook5bbefc9ce3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'brist1d:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F82611%2F9553358%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20241008%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20241008T094104Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D95d55018ee1b5d03f94b1ae0798d1f875da0a0e158f6bd65d59be8b339008eb1f23487a0ef2291f47115a064d709cd23886f38f74b21ae424a684f375a3d5a8f84da47d34f770f86559b62bd07fa6c0c5abfe25c8c1064546d8dc994d589efb9fc4bd9f754de1caaab28e7da6df0c013f73abd1f14b2182f97e2f97fb851842df3bd9aafe63c4190fdef533ab13fa6bde6f886ae2f5a1eabb54b631c6e4cbbbb18822589e0f85a05d2e1f454972f89e5ad386d6fedda6ea0cc0612a365a63314ee871e7b4656c82d9956d034ad9875425be52733023ffa342baff3fcd64c4631319586593783f680d7676de7c3d29d111320acf8ddc8be9175d2012430928332'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading brist1d, 9652764 bytes compressed
Downloaded and uncompressed: brist1d
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/brist1d/train.csv
/kaggle/input/brist1d/activities.txt
/kaggle/input/brist1d/sample_submission.csv
/kaggle/input/brist1d/test.csv


In [3]:
pip install autogluon



In [4]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
import numpy as np
from autogluon.tabular import TabularPredictor

In [5]:
# Load datasets
train = pd.read_csv('/kaggle/input/brist1d/train.csv', low_memory=False)
test = pd.read_csv('/kaggle/input/brist1d/test.csv', low_memory=False)

In [6]:
# Convert time column to datetime and extract hour and minute
train['time'] = pd.to_datetime(train['time'], format='%H:%M:%S')
test['time'] = pd.to_datetime(test['time'], format='%H:%M:%S')

train['hour'] = train['time'].dt.hour
train['minute'] = train['time'].dt.minute
train.drop('time', axis=1, inplace=True)

test['hour'] = test['time'].dt.hour
test['minute'] = test['time'].dt.minute
test.drop('time', axis=1, inplace=True)

In [7]:
# Segregate numerical and categorical columns
numerical_cols = train.select_dtypes(include=['float64', 'int64']).columns.tolist()
numerical_cols.remove('bg+1:00')
categorical_cols = [col for col in train.columns if 'activity' in col]

In [8]:

# Handle missing values
imputer = SimpleImputer(strategy='constant', fill_value=0)
train[numerical_cols] = imputer.fit_transform(train[numerical_cols])
test[numerical_cols] = imputer.transform(test[numerical_cols])

In [9]:
# Label encode categorical columns
for col in categorical_cols:
    train[col] = train[col].fillna('None')
    test[col] = test[col].fillna('None')
    le = LabelEncoder()
    combined_data = pd.concat([train[col], test[col]], axis=0)
    le.fit(combined_data)
    train[col] = le.transform(train[col])
    test[col] = le.transform(test[col])

In [10]:
# Prepare data for AutoGluon
train_data = train.drop(['id', 'p_num'], axis=1)
test_data = test.drop(['id', 'p_num'], axis=1)

# Define the label (target) column
label = 'bg+1:00'

In [None]:

# Train model using AutoGluon
predictor = TabularPredictor(label=label, eval_metric='root_mean_squared_error').fit(train_data)


No path specified. Models will be saved in: "AutogluonModels/ag-20241008_152837"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024
CPU Count:          2
Memory Avail:       9.57 GB / 12.67 GB (75.5%)
Disk Space Avail:   60.20 GB / 107.72 GB (55.9%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
	Co

[1000]	valid_set's rmse: 1.7759
[2000]	valid_set's rmse: 1.69588
[3000]	valid_set's rmse: 1.64256
[4000]	valid_set's rmse: 1.60162
[5000]	valid_set's rmse: 1.56567
[6000]	valid_set's rmse: 1.53613
[7000]	valid_set's rmse: 1.51446
[8000]	valid_set's rmse: 1.49355
[9000]	valid_set's rmse: 1.47179
[10000]	valid_set's rmse: 1.45429


	-1.4543	 = Validation score   (-root_mean_squared_error)
	2178.79s	 = Training   runtime
	5.11s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 1.76302
[2000]	valid_set's rmse: 1.67491
[3000]	valid_set's rmse: 1.61508
[4000]	valid_set's rmse: 1.5673
[5000]	valid_set's rmse: 1.52903
[6000]	valid_set's rmse: 1.49903
[7000]	valid_set's rmse: 1.47236
[8000]	valid_set's rmse: 1.4508
[9000]	valid_set's rmse: 1.43335
[10000]	valid_set's rmse: 1.41638


	-1.4164	 = Validation score   (-root_mean_squared_error)
	2645.44s	 = Training   runtime
	4.23s	 = Validation runtime
Fitting model: RandomForestMSE ...
