In [None]:
# Databricks notebook source
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df_silver = spark.read.parquet("/mnt/silver/economia_brasileira")
df_gold = df_silver.toPandas()
df_gold = df_gold.sort_values("data")

correlacao = df_gold[["ipca", "selic", "ibov"]].corr()

sns.heatmap(correlacao, annot=True, cmap="Blues")
plt.title("Matriz de Correlação")
plt.show()

df_gold.to_csv("/dbfs/mnt/gold/economia_correlacao.csv", index=False)


# COMMAND ----------

df_gold.to_csv("/tmp/economia_correlacao.csv", index=False)


# COMMAND ----------

df_gold.to_csv("/tmp/economia_correlacao.csv", index=False)
dbutils.fs.cp("file:/tmp/economia_correlacao.csv", "dbfs:/FileStore/economia_correlacao.csv")


# COMMAND ----------

import pandas as pd
import matplotlib.pyplot as plt

df = spark.read.parquet("/mnt/silver/economia_brasileira").toPandas()
df = df.sort_values("data")

plt.figure(figsize=(10, 4))
plt.plot(df["data"], df["ipca"], label="IPCA")
plt.plot(df["data"], df["selic"], label="SELIC")
plt.title("Evolução IPCA e SELIC (2009-2024)")
plt.xlabel("Data")
plt.ylabel("Percentual (%)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# COMMAND ----------

plt.figure(figsize=(10, 4))
plt.plot(df["data"], df["ibov"], label="IBOVESPA", color="green")
plt.title("Evolução IBOVESPA (2009-2024)")
plt.xlabel("Data")
plt.ylabel("Pontos")
plt.grid(True)
plt.tight_layout()
plt.show()


# COMMAND ----------

import seaborn as sns

plt.figure(figsize=(6, 4))
sns.regplot(data=df, x="selic", y="ibov", line_kws={"color": "red"})
plt.title("SELIC vs IBOVESPA")
plt.tight_layout()
plt.show()


# COMMAND ----------

# MAGIC %sql
# MAGIC -- Mostrar total de registros
# MAGIC SELECT COUNT(*) FROM silver_economia;
# MAGIC
# MAGIC -- Ver média de cada indicador
# MAGIC SELECT AVG(ipca), AVG(selic), AVG(ibov) FROM silver_economia;
# MAGIC
# MAGIC -- Ver dados do ano mais recente
# MAGIC SELECT * FROM silver_economia WHERE YEAR(data) = 2024;
# MAGIC

# COMMAND ----------

df_silver = spark.read.parquet("/mnt/silver/economia_brasileira")
df_silver.createOrReplaceTempView("silver_economia")


# COMMAND ----------

# MAGIC %sql
# MAGIC SELECT COUNT(*) FROM silver_economia
# MAGIC

# COMMAND ----------

df_silver = spark.read.parquet("/mnt/silver/economia_brasileira")
df_silver.createOrReplaceTempView("silver_economia")


# COMMAND ----------

# MAGIC %sql
# MAGIC SELECT COUNT(*) FROM silver_economia;
# MAGIC

# COMMAND ----------

# MAGIC %sql
# MAGIC SELECT AVG(ipca), AVG(selic), AVG(ibov) FROM silver_economia;
# MAGIC

# COMMAND ----------

import matplotlib.pyplot as plt
import seaborn as sns


# COMMAND ----------

# MAGIC %sql
# MAGIC %%sql
# MAGIC
# MAGIC -- Criar banco de dados (opcional)
# MAGIC CREATE DATABASE IF NOT EXISTS mvp_economia;
# MAGIC USE mvp_economia;
# MAGIC
# MAGIC -- Criar tabela dimensão tempo
# MAGIC CREATE TABLE IF NOT EXISTS dim_tempo (
# MAGIC   data_id INT,
# MAGIC   data DATE,
# MAGIC   ano INT,
# MAGIC   mes INT
# MAGIC );
# MAGIC
# MAGIC -- Outras tabelas...
# MAGIC

# COMMAND ----------

spark.sql("""
CREATE TABLE IF NOT EXISTS dim_tempo (
  data_id INT,
  data DATE,
  ano INT,
  mes INT
)
""")


# COMMAND ----------

# MAGIC %sql
# MAGIC CREATE DATABASE IF NOT EXISTS mvp_economia;
# MAGIC USE mvp_economia;
# MAGIC
# MAGIC CREATE TABLE IF NOT EXISTS dim_tempo (
# MAGIC   data_id INT,
# MAGIC   data DATE,
# MAGIC   ano INT,
# MAGIC   mes INT
# MAGIC );
# MAGIC
# MAGIC CREATE TABLE IF NOT EXISTS dim_ipca (
# MAGIC   ipca_id INT,
# MAGIC   ipca_valor FLOAT
# MAGIC );
# MAGIC
# MAGIC CREATE TABLE IF NOT EXISTS dim_selic (
# MAGIC   selic_id INT,
# MAGIC   selic_valor FLOAT
# MAGIC );
# MAGIC
# MAGIC CREATE TABLE IF NOT EXISTS dim_ibov (
# MAGIC   ibov_id INT,
# MAGIC   ibov_valor INT
# MAGIC );
# MAGIC
# MAGIC CREATE TABLE IF NOT EXISTS fato_economia (
# MAGIC   data_id INT,
# MAGIC   ipca_id INT,
# MAGIC   selic_id INT,
# MAGIC   ibov_id INT
# MAGIC );
# MAGIC

# COMMAND ----------

# MAGIC %sql
# MAGIC USE mvp_economia;
# MAGIC
# MAGIC -- Populando dimensões
# MAGIC INSERT INTO dim_tempo VALUES (1, '2024-12-01', 2024, 12);
# MAGIC INSERT INTO dim_ipca VALUES (1, 0.47);
# MAGIC INSERT INTO dim_selic VALUES (1, 11.75);
# MAGIC INSERT INTO dim_ibov VALUES (1, 130800);
# MAGIC
# MAGIC -- Populando fato
# MAGIC INSERT INTO fato_economia VALUES (1, 1, 1, 1);
# MAGIC

# COMMAND ----------

# MAGIC %sql
# MAGIC SELECT 
# MAGIC   t.data,
# MAGIC   t.ano,
# MAGIC   t.mes,
# MAGIC   i.ipca_valor,
# MAGIC   s.selic_valor,
# MAGIC   b.ibov_valor
# MAGIC FROM fato_economia f
# MAGIC JOIN dim_tempo t ON f.data_id = t.data_id
# MAGIC JOIN dim_ipca i ON f.ipca_id = i.ipca_id
# MAGIC JOIN dim_selic s ON f.selic_id = s.selic_id
# MAGIC JOIN dim_ibov b ON f.ibov_id = b.ibov_id;
# MAGIC