## Importando Bibliotecas

In [None]:
import pandas as pd
import numpy as np

## Importando arquivos

In [None]:
df = pd.read_csv('../input/train.csv', parse_dates=[0])
test = pd.read_csv('../input/test.csv', parse_dates=[0])

## Mudando o Nome da Coluna count

In [None]:
df.rename(columns={'count':'rentals'}, inplace=True)

## Pegando o log das colunas que iremos prever

In [None]:
for col in ['rentals', 'registered', 'casual']:
    df[col] = np.log(df[col] + 1)

## Pré-processamento dos Dados

Juntando os `DataFrames`

In [None]:
df = df.append(test)

Tratamento de datas

In [None]:
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['dayofweek'] = df['datetime'].dt.dayofweek
df['hour'] = df['datetime'].dt.hour

Criando a coluna `rolling_temp`

In [None]:
df.set_index('datetime', inplace=True)

In [None]:
df.sort_index(inplace=True)

In [None]:
df['rolling_temp'] = df['temp'].rolling(4, min_periods=1).mean()

In [None]:
df.reset_index(inplace=True)

Separando os `DataFrames`:

In [None]:
test = df[df['rentals'].isnull()]

In [None]:
df = df[~df['rentals'].isnull()]

## Selecionando as Colunas que Iremos Executar o Modelo

In [None]:
removed_cols = ['rentals', 'casual', 'registered', 'datetime']

In [None]:
feats = [c for c in df.columns if c not in removed_cols]

## Criando um Set de Validação Melhor

In [None]:
train, valid = df[df['day'] <= 15], df[df['day'] > 15]

In [None]:
train.shape, valid.shape

In [None]:
train['day'].value_counts().sort_index()

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor(random_state=42, n_jobs=-1, n_estimators=200,
                          min_samples_split=4, max_features=0.9,
                          max_depth=17, oob_score=True)

In [None]:
rf.fit(train[feats], train['rentals'])

In [None]:
preds = rf.predict(valid[feats])

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(valid['rentals'], preds)**(1/2)

In [None]:
# Removendo a coluna day
feats = [c for c in feats if c not in ['day']]

In [None]:
# Testando de novo sem a coluna day
rf = RandomForestRegressor(random_state=42, n_jobs=-1, n_estimators=200,
                           min_samples_leaf=10, oob_score=True, max_features=0.9)

In [None]:
rf.fit(train[feats], train['rentals'])

In [None]:
preds = rf.predict(valid[feats])

In [None]:
mean_squared_error(valid['rentals'], preds)**(1/2)

In [None]:
train_preds = rf.predict(train[feats])

In [None]:
mean_squared_error(train['rentals'], train_preds)**(1/2)