## Aqui, trabalharemos nas nossas análises

In [1]:
%defaultDatasource jdbc:h2:mem:db

Tabela das Receitas

In [2]:
DROP TABLE IF EXISTS Recipes

In [3]:
CREATE TABLE Recipes AS 
SELECT "Recipe ID" AS recipe_id, Cuisine, Title
FROM CSVREAD('../data/raw/01_Recipe_Details.csv');

In [4]:
CREATE INDEX index_RA
ON Recipes (recipe_id);

Tabela dos Ingredientes

In [5]:
DROP TABLE IF EXISTS Ingredients_to_Foodgroup

In [6]:
CREATE TABLE Ingredients_to_Foodgroup AS 
SELECT ID, food_group
FROM CSVREAD('../data/interim/Ingredients_to_Foodb_Groups.csv');

In [7]:
CREATE INDEX index_IF
ON Ingredients_to_Foodgroup (ID);

Tabela de ligação entre receitas e ingredientes

In [8]:
DROP TABLE IF EXISTS Recipes_Ingredients

In [9]:
CREATE TABLE Recipes_Ingredients AS
SELECT "Recipe ID" AS recipe_id, "Entity ID" as ingredient_id
FROM CSVREAD('../data/raw/04_Recipe-Ingredients_Aliases.csv');

In [10]:
CREATE INDEX index_RI
ON Recipes_Ingredients (recipe_id);

Tabela de ligação entre país e região

In [11]:
DROP TABLE IF EXISTS COUNTRIES

In [12]:
CREATE VIEW Countries AS
SELECT Country, "World Region" AS region
FROM CSVREAD('../data/interim/CountriesTable.csv');

Criando tabela do food prices

In [13]:
DROP TABLE IF EXISTS prices

In [14]:
CREATE TABLE Prices AS
SELECT "Country Name" AS country, "Series Name" AS category, "2017 [YR2017]" AS price
FROM CSVREAD('../data/raw/Food_Prices_Data.csv');

Atualizando os valores dos preços para float, e colocando como 0 os que não tem dados

In [15]:
UPDATE Prices
SET price = CASE 
                    WHEN price = '..' THEN 0
                    WHEN price NOT LIKE '%[^0-9.]%' THEN CAST(price AS FLOAT)
                    ELSE price
                END;


Criando uma tabela de preços de categoria por região

In [16]:
DROP TABLE IF EXISTS Prices_per_Region

In [17]:
CREATE TABLE Prices_per_Region AS 
SELECT DISTINCT C.region, AVG(COALESCE(CAST(price AS FLOAT), 0)) AS average_price, P.category
FROM Countries C, Prices P
WHERE C.Country = P.country 
GROUP BY C.region, P.category
ORDER BY region

Criando tabela de Receitas por Região

In [18]:
DROP TABLE if exists recipes_per_region

In [19]:
CREATE TABLE Recipes_per_Region AS
SELECT R.recipe_id, PR.region
FROM Recipes R, Prices_per_Region PR
WHERE R.Cuisine = PR.region

In [20]:
CREATE INDEX index_RP
ON Recipes_per_Region(recipe_id);

Carregando a tabela de Foodb_to_Prices

In [21]:
CREATE TABLE Foodb_to_Prices AS
SELECT *
FROM CSVREAD('../data/interim/Foodb_to_Food_Prices.csv');

Criando uma tabela que, para cada ingrediente, para cada receita, temos a categoria do ingrediente (para podermos analisar o seu preço)

In [22]:
CREATE TABLE Ingredients_Series_per_Recipes AS
SELECT RI.recipe_id, FP.series_name
FROM Recipes_Ingredients RI, Ingredients_to_Foodgroup IF, Foodb_to_Prices FP
WHERE RI.ingredient_id = IF.id AND IF.food_group=FP.food_group 

In [23]:
CREATE INDEX index_ISR
ON Ingredients_Series_per_Recipes (recipe_id,series_name);

Criando tabela que conta o número de ocorrências de cada categoria na região "Africa" 

In [24]:
CREATE TABLE Ingredients_Ratio_Africa AS
SELECT ISR.series_name, COUNT(ISR.series_name)/COALESCE(CAST(3906 AS FLOAT), 0) AS ratio 
FROM Ingredients_Series_per_Recipes ISR, Recipes_per_Region RR 
WHERE ISR.recipe_id=RR.recipe_id AND RR.region='Africa'
GROUP BY ISR.series_name
ORDER BY ratio

In [25]:
CALL CSVWRITE('../data/processed/Ingredients_Ratio_Africa.csv', 'SELECT * FROM Ingredients_Ratio_Africa');

6

Criando tabela que mostra o número de receitas da região "Africa".

In [26]:
SELECT COUNT(recipe_id) 
FROM Recipes_per_Region 
WHERE region = 'Africa'
GROUP BY region

3906

Criando tabela que conta o número de ocorrências de cada categoria na região "France".

In [27]:
CREATE TABLE Ingredients_Ratio_France AS
SELECT ISR.series_name, COUNT(ISR.series_name)/COALESCE(CAST(16218 AS FLOAT), 0) AS ratio
FROM Ingredients_Series_per_Recipes ISR, Recipes_per_Region RR 
WHERE ISR.recipe_id=RR.recipe_id AND RR.region='France'
GROUP BY ISR.series_name
ORDER BY ratio

Criando tabela que mostra o número de receitas da região "France".

In [28]:
SELECT COUNT(recipe_id) 
FROM Recipes_per_Region 
WHERE region = 'France'
GROUP BY region

16218

In [30]:
CALL CSVWRITE('../data/processed/Ingredients_Ratio_France.csv', 'SELECT * FROM Ingredients_Ratio_France');


6