# Predict Bike Sharing Demand with Autogluon

In [None]:
import pandas as pd
from autogluon.tabular import TabularPredictor
import autogluon
import matplotlib.pyplot as plt
print('Autogluon Version: ', autogluon.common.version.__version__)
import numpy as np

## Step-1: Load and explore the data

In [None]:
train = pd.read_csv('bike-sharing-demand/train.csv')
train['datetime'] = pd.to_datetime(train['datetime'])
print(train.info())
train.head()

In [None]:
test = pd.read_csv('bike-sharing-demand/test.csv')
test['datetime'] = pd.to_datetime(test['datetime'])
print(test.info())
test.head()

In [None]:
submission = pd.read_csv('bike-sharing-demand/sampleSubmission.csv')
print(submission.info())
submission.tail()

## Step-2: Train Model using Autogluon's Tabular Prediction

In [None]:
predictor = TabularPredictor(label='count', problem_type='regression')
predictor = predictor.fit(
    train_data=train.drop(columns=['casual', 'registered']),
    time_limit=300,
    presets='high_quality'
)

In [None]:
predictor.fit_summary()

In [None]:
predictions = predictor.predict(test)

### Checking for any negative predictions and set them to 0

In [None]:
print(f'Number of negative predictions: {(predictions < 0).sum()}')
predictions[predictions < 0] = 0
submission['count'] = predictions.astype('int')
submission.to_csv('bike-sharing-demand/submission.csv', index=False)

## Feature Engineering with EDA

In [None]:
train.hist(figsize=(15,12))

In [None]:
# Create new features for individual day, hour and month
train['month'] = train['datetime'].dt.month
train['day'] = train['datetime'].dt.day
train['hour'] = train['datetime'].dt.hour
test['month'] = test['datetime'].dt.month
test['day'] = test['datetime'].dt.day
test['hour'] = test['datetime'].dt.hour

In [None]:
train['season'] = pd.Categorical(train['season'])
train['weather'] = pd.Categorical(train['weather'])
test['season'] = pd.Categorical(test['season'])
test['weather'] = pd.Categorical(test['weather'])

In [None]:
train['outdoor_w'] = (train['atemp'] < 30) & (train['atemp'] > 14) & (train['humidity'] < 80) & (train['windspeed'] < 13)
train['outdoor_w'] = train['outdoor_w'].astype('int')
test['outdoor_w'] = (test['atemp'] < 30) & (test['atemp'] > 14) & (test['humidity'] < 80) & (test['windspeed'] < 13)
test['outdoor_w'] = test['outdoor_w'].astype('int')

In [None]:
train.hist(figsize=(10,12))

In [None]:
predictor_new_features = TabularPredictor(label='count', problem_type='regression')
predictor_new_features = predictor_new_features.fit(
    train_data=train.drop(columns=['casual', 'registered']),
    time_limit=300,
    presets='high_quality',
)

In [None]:
predictions_new_features = predictor_new_features.predict(test)
print(f'Number of negative predictions: {(predictions_new_features < 0).sum()}')
predictions_new_features[predictions_new_features < 0] = 0
submission['count'] = predictions_new_features.astype('int')
submission.to_csv('bike-sharing-demand/submission_new_features.csv', index=False)

In [None]:
!kaggle competitions submit -c bike-sharing-demand -f bike-sharing-demand/submission_new_features.csv -m "new feature: outdoor weather"

In [None]:
!kaggle competitions submissions -c bike-sharing-demand | tail -n +1 | head -n 6

In [None]:
from autogluon.tabular import models

In [None]:
predictor_hyper_opt = TabularPredictor(label='count', problem_type='regression')
predictor_hyper_opt = predictor_hyper_opt.fit(
    train_data=train.drop(columns=['casual', 'registered']),
    time_limit=300,
    presets='high_quality',
    
)