## Importando Bibliotecas

In [None]:
import pandas as pd
import numpy as np

## Importando arquivos

In [None]:
# parse_dates está pegando a primeira coluna e já passando para datetime
df = pd.read_csv('../input/train.csv', parse_dates=[0])
test = pd.read_csv('../input/test.csv', parse_dates=[0])

## Mudando o Nome da Coluna count

In [None]:
# inplace = True --> faz a alteração no próprio dataframe
df.rename(columns={'count':'rentals'}, inplace=True)

## Pegando o log da coluna rental, registered e casual:

Somar o +1 serve para evitarmos a operação `log(0)`, que resultar em um valor infinito.

In [None]:
for col in ['rentals', 'registered', 'casual']:
    df[col] = np.log(df[col] + 1)

## Pré-processamento dos Dados

Juntando os `DataFrames`

In [None]:
df = df.append(test)

Tratamento de datas

In [None]:
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['dayofweek'] = df['datetime'].dt.dayofweek
df['hour'] = df['datetime'].dt.hour

Criando a coluna `rolling_temp`

In [None]:
df.set_index('datetime', inplace=True)

In [None]:
df.head()

In [None]:
df.sort_index(inplace=True)

In [None]:
df['rolling_temp'] = df['temp'].rolling(4, min_periods=1).mean()

In [None]:
df.reset_index(inplace=True)

Separando os `DataFrames`:

In [None]:
test = df[df['rentals'].isnull()]

In [None]:
df = df[~df['rentals'].isnull()]

## Selecionando as Colunas que Iremos Executar o Modelo

In [None]:
removed_cols = ['rentals', 'casual', 'registered', 'datetime']

In [None]:
feats = [c for c in df.columns if c not in removed_cols]

# Instanciando o modelo

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [None]:
train, valid = train_test_split(df,random_state=42)

In [None]:
rf = RandomForestRegressor(random_state=42)

In [None]:
rf.fit(train[feats],train['rentals'])

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(rf.predict(valid[feats]), valid['rentals'])**(1/2)

In [None]:
pd.Series(rf.feature_importances_,index=feats).sort_values().plot.bar()