# GET request using the API key

In [0]:
API_ENDPOINT = "https://api.freecurrencyapi.com/v1/latest?apikey="
API_KEY = "fca_live_SSQfvo0UbcsIHFN55nuixubaI3rQI3jhe94SjWfR"

In [0]:
import requests

def get_rates():
  response = requests.get(API_ENDPOINT + API_KEY)
  return response.json()['data']

In [0]:
req = get_rates()

In [0]:
# Get the BRL convertion rate.
brl = req['BRL']

In [0]:
brl

5.4570407161

# Read the previous final dataset

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [0]:
# Read the table.
df_full_cleaned_orders = "/Volumes/zubale/challenge1/output/order_full_information.csv"

In [0]:
# Schema for the products table.
orders_schema= StructType([
StructField('order_id',StringType(),nullable=True),
StructField('order_created_date',DateType(),nullable=True ),
StructField('product_name',StringType(),nullable=True),
StructField('quantity',IntegerType(),nullable=True),
StructField('total_price',FloatType(),nullable=True),
])

In [0]:
df = spark.read.schema(orders_schema).csv(df_full_cleaned_orders, sep=",", header=True)

In [0]:
df.limit(5).display()

order_id,order_created_date,product_name,quantity,total_price
1,2024-12-01,Product_11,1,69.06
2,2024-12-01,Product_17,2,197.02
3,2024-12-01,Product_19,1,51.94
4,2024-12-01,Product_12,1,50.99
5,2024-12-01,Product_11,2,138.12


In [0]:
df_transformed = df.withColumn("total_price_us", col("total_price") / brl).select(
    col("order_created_date"),
    col("order_id"),
    col("product_name"),
    col("quantity"),
    col("total_price").alias('total_price_br'),
    col("total_price_us")
)

In [0]:
# Save it to a csv.
df_transformed.write.mode('overwrite').csv("/Volumes/zubale/challenge2/output/fixed_order_full_information.csv", header=True)


# Exploratory

### 1.	Date where we create the max amount of orders. 

In [0]:
from pyspark.sql.functions import count, desc

# Date where we created the max amount of orders.
df_transformed_max_orders = df_transformed.groupBy("order_created_date").agg(
    count("order_id").alias("order_count")
).orderBy(
    desc("order_count")
).limit(1)
df_transformed_max_orders.display()

order_created_date,order_count
2024-12-06,10


### 2.	Most demanded product and the total sell price. 

In [0]:
# 2. Most demanded product and the total sell price.
df_transformed_demanded_products = df_transformed.groupBy("product_name").agg(
    sum("quantity").alias("sum_quantity"),
    sum("total_price_us").alias("total_price_us"),
    sum("total_price_br").alias("total_price_br")
).orderBy("sum_quantity", ascending=False).limit(1)
df_transformed_demanded_products.display()

product_name,sum_quantity,total_price_us,total_price_br
Product_5,20,163.42190818466176,891.8000068664551


### 3.	The top 3 most demanded categories.

Since we now require to add the column for categgory of the product, we have to go back to the bronze layer, to get that column back on our final table.
This is not optimal and we will have to use both raw tables, because the right way to do the join is with the product_id, and we dont have that now, It is not right to use the product name to do a join.

In [0]:
df_products_catalog = spark.table("zubale.bronze.zubaleproducts")
df_orders = spark.table("zubale.bronze.zubaleorders")

In [0]:
# Join the categories and the quantity.
df_products = df_products_catalog.join(df_orders, df_products_catalog.id == df_orders.product_id, "left").select(df_products_catalog.category, df_orders.quantity).groupBy("category").agg(sum("quantity").alias("quantity")).orderBy("quantity", ascending=False).limit(3)
df_products.display()


category,quantity
Shirts,50
Jackets,30
Pants,29


### Store the results in a single CSV file named: kpi_product_orders.csv

It is not possible to store more than one page inside a CSV like an Excel would.

I would save it to a single excel file, due to limitations of the Databricks free edition I am using I am not able to access dbfs.

I will save every one of the results KPIs tables to a csv, one for each.

In [0]:
# df_products, df_transformed_demanded_products and df_transformed_max_orders saved into 3 separate csv in the volume/zubale/challenge2/ouput one for each.
df_products.write.mode('overwrite').csv("/Volumes/zubale/challenge2/output/demanded-categories.csv", header=True)
df_transformed_demanded_products.write.mode('overwrite').csv("/Volumes/zubale/challenge2/output/demanded_products.csv", header=True)
df_transformed_max_orders.write.mode('overwrite').csv("/Volumes/zubale/challenge2/output/max_orders.csv", header=True)

In [0]:
# Saving the tables into golden.
df_products.write.mode('overwrite').saveAsTable("zubale.golden.demanded_categories")
df_transformed_demanded_products.write.mode('overwrite').saveAsTable("zubale.golden.demanded_products")
df_transformed_max_orders.write.mode('overwrite').saveAsTable("zubale.golden.max_orders")