In [2]:
# 1. Autenticação
from google.colab import auth
auth.authenticate_user()
print("Autenticação concluída.")


Autenticação concluída.


In [3]:
# 2. Configuração do Projeto
project_id = 'snappy-spanner-474217-i9'

In [4]:
import pandas as pd
import pandas_gbq

query = """
SELECT
  TIMESTAMP_TRUNC(Time, HOUR) as DataHora,
  Appliance as Aparelho,

  -- Coluna correta confirmada: Consumption_Watts
  AVG(CAST(Consumption_Watts AS FLOAT64)) as Consumo_Medio_Hora,

  -- Features temporais extraídas via SQL
  EXTRACT(HOUR FROM Time) as Hora,
  EXTRACT(DAYOFWEEK FROM Time) as DiaSemana,
  EXTRACT(MONTH FROM Time) as Mes
FROM
  `tcc_consumo_energia.dados_limpos_house2`
GROUP BY
  1, 2, 4, 5, 6
ORDER BY
  1
"""

print("Executando query no BigQuery...")
df = pandas_gbq.read_gbq(query, project_id=project_id)

print(f"Dados carregados. Linhas: {df.shape[0]}")
df.head()

Executando query no BigQuery...
Downloading: 100%|[32m██████████[0m|
Dados carregados. Linhas: 102186


Unnamed: 0,DataHora,Aparelho,Consumo_Medio_Hora,Hora,DiaSemana,Mes
0,2013-09-17 22:00:00+00:00,Microwave,0.0,22,3,9
1,2013-09-17 22:00:00+00:00,Television,0.0,22,3,9
2,2013-09-17 22:00:00+00:00,Hi_Fi,0.0,22,3,9
3,2013-09-17 22:00:00+00:00,Washing_Machine,0.0,22,3,9
4,2013-09-17 22:00:00+00:00,Dishwasher,0.0,22,3,9


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# 1. Preparação
df['Aparelho_Code'] = df['Aparelho'].astype('category').cat.codes

# 2. Definir Features (X) e Alvo (y)
features = ['Hora', 'DiaSemana', 'Mes', 'Aparelho_Code']
target = 'Consumo_Medio_Hora'

X = df[features]
y = df[target]

# 3. Divisão Treino (80%) e Teste (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 4. Regressão Linear (Baseline)
lr = LinearRegression()
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)

# 5. Random Forest (Modelo Principal)
rf = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)

# 6. Mostrar Métricas
r2_rf = r2_score(y_test, pred_rf)
mae_rf = mean_absolute_error(y_test, pred_rf)

print("\n" + "="*40)
print(" RESULTADOS: ")
print("="*40)
print("\n"f"1. R² (Random Forest): {r2_rf:.4f}")
print("\n"f"2. MAE (Erro Médio):   {mae_rf:.2f} Watts")
print("="*40 + "\n")


 RESULTADOS: 

1. R² (Random Forest): -0.1638

2. MAE (Erro Médio):   23.16 Watts



In [7]:
# 1. Tabela final com as previsões
df_resultado = df.loc[X_test.index].copy()
df_resultado['Consumo_Real'] = y_test
df_resultado['Previsao_Regressao'] = pred_lr
df_resultado['Previsao_RandomForest'] = pred_rf

# Limpa colunas auxiliares que não precisam ir pro BI
df_export = df_resultado[['DataHora', 'Aparelho', 'Consumo_Real', 'Previsao_Regressao', 'Previsao_RandomForest']]

# 2. Envia para o BigQuery
tabela_destino = 'tcc_consumo_energia.tabela_previsoes_finais'

print(f"Salvando resultados em: {tabela_destino}...")

pandas_gbq.to_gbq(df_export,
                  tabela_destino,
                  project_id=project_id,
                  if_exists='replace')
print("\nTabela criada no BigQuery")

Salvando resultados em: tcc_consumo_energia.tabela_previsoes_finais...


100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]


Tabela criada no BigQuery



