# Wrangling dataframe for 1998 and 2021 Export / Import / Domestic Consumption / Production comparisons Worldwide


In [61]:
# Load Packages
library(readr)
library(dplyr)

In [2]:
# Loading in dataframe
USDA <- read.csv("../Data/USDA_Production_Supply_And_Distribution.csv")

In [3]:
# Filtering for only the years 1998 and 2021

USDA9821 <- filter(USDA, Year == 2021 | Year == 1998)

unique(USDA9821$Year)

In [5]:
unique(USDA9821$Commodity_Description)

In [9]:
# Filtering only highly-correlated commodities to Honey

USDA9821Filtered <- filter(USDA9821, Commodity_Description == 'Almonds, Shelled Basis' |
                           Commodity_Description == 'Apples, Fresh' |
                           Commodity_Description == 'Corn' |
                           Commodity_Description == 'Grapefruit, Fresh' |
                           Commodity_Description == 'Meat, Swine' |
                           Commodity_Description == 'Barley' |
                           Commodity_Description == 'Oats' |
                           Commodity_Description == 'Rye' |
                           Commodity_Description == 'Oil, Soybean' |
                           Commodity_Description == 'Oil, Rapeseed' |
                           Commodity_Description == 'Walnuts, Inshell Basis' |
                           Commodity_Description == 'Coffee, Green' |
                           Commodity_Description == 'Dairy, Milk, Fluid')

unique(USDA9821Filtered$Commodity_Description)  

In [10]:
# Deleteing null values
USDA9821Filtered <- na.omit(USDA9821Filtered)

In [27]:
USDA9821Filtered <- data.frame(USDA9821Filtered)

USDA9821Filtered1 <- data.frame(USDA9821Filtered)


In [32]:
# Converting all the possible Measurements into common MT (or Metric Ton)


# Convert the 1000 MT to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(1000 MT)'] <- USDA9821Filtered$Value * 1000
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(1000 MT)'] <- '(MT)'

# Convert the (1000 60 KG BAGS) to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(1000 60 KG BAGS)'] <- USDA9821Filtered$Value * 1000 * .001 * 60
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(1000 60 KG BAGS)'] <- '(MT)'

# Convert the (1000 MT CWE) to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(1000 MT CWE)'] <- USDA9821Filtered$Value * 1000
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(1000 MT CWE)'] <- '(MT)'

# Convert the (1000 HA) to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(1000 HA)'] <- USDA9821Filtered$Value * 1000 * 12.55
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(1000 HA)'] <- '(MT)'

# Convert the (1000 MT CWE) to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(MT/HA)'] <- USDA9821Filtered$Value * 12.55
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(MT/HA)'] <- '(MT)'

# Convert the (KG) to Tons

USDA9821Filtered$Value[USDA9821Filtered$Unit_Description == '(KG)'] <- USDA9821Filtered$Value * 1000 * .001
USDA9821Filtered$Unit_Description[USDA9821Filtered$Unit_Description == '(KG)'] <- '(MT)'


View(USDA9821Filtered)

“number of items to replace is not a multiple of replacement length”


Unnamed: 0_level_0,X,Commodity_Description,Country_Code,Country_Name,Year,Attribute_Description,Unit_Description,Value,Seen_On
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>,<int>
1,120,"Almonds, Shelled Basis",GR,Greece,1998,Beginning Stocks,(MT),4523,199900
2,121,"Almonds, Shelled Basis",GR,Greece,1998,Domestic Consumption,(MT),14000,199900
3,122,"Almonds, Shelled Basis",GR,Greece,1998,Ending Stocks,(MT),3623,199900
4,123,"Almonds, Shelled Basis",GR,Greece,1998,Exports,(MT),1500,199900
5,124,"Almonds, Shelled Basis",GR,Greece,1998,Imports,(MT),2600,199900
6,125,"Almonds, Shelled Basis",GR,Greece,1998,Production,(MT),12000,199900
7,126,"Almonds, Shelled Basis",GR,Greece,1998,Total Distribution,(MT),19123,199900
8,127,"Almonds, Shelled Basis",GR,Greece,1998,Total Supply,(MT),19123,199900
9,704,"Almonds, Shelled Basis",IT,Italy,1998,Beginning Stocks,(MT),2000,199900
10,705,"Almonds, Shelled Basis",IT,Italy,1998,Domestic Consumption,(MT),22593,199900


In [36]:
# Looking at unique Units of Measurements

unique(USDA9821Filtered$Unit_Description)

In [35]:
# Deleteing the rows for units that can't be converted into MT

USDA9821Filtered <- USDA9821Filtered[USDA9821Filtered$Unit_Description != '(PERCENT)', ] 

USDA9821Filtered <- USDA9821Filtered[USDA9821Filtered$Unit_Description != '(1000 HEAD)', ] 


In [41]:
USDA9821Filtered

Unnamed: 0_level_0,X,Commodity_Description,Country_Code,Country_Name,Year,Attribute_Description,Unit_Description,Value,Seen_On
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>,<int>
1,120,"Almonds, Shelled Basis",GR,Greece,1998,Beginning Stocks,(MT),4523,199900
2,121,"Almonds, Shelled Basis",GR,Greece,1998,Domestic Consumption,(MT),14000,199900
3,122,"Almonds, Shelled Basis",GR,Greece,1998,Ending Stocks,(MT),3623,199900
4,123,"Almonds, Shelled Basis",GR,Greece,1998,Exports,(MT),1500,199900
5,124,"Almonds, Shelled Basis",GR,Greece,1998,Imports,(MT),2600,199900
6,125,"Almonds, Shelled Basis",GR,Greece,1998,Production,(MT),12000,199900
7,126,"Almonds, Shelled Basis",GR,Greece,1998,Total Distribution,(MT),19123,199900
8,127,"Almonds, Shelled Basis",GR,Greece,1998,Total Supply,(MT),19123,199900
9,704,"Almonds, Shelled Basis",IT,Italy,1998,Beginning Stocks,(MT),2000,199900
10,705,"Almonds, Shelled Basis",IT,Italy,1998,Domestic Consumption,(MT),22593,199900


In [47]:
# Dropping unnecessary values & Merging Attribute Descriptions


# Converting different types of Imports to Imports
# 'Bean Imports', 'Roast & Ground Imports', 'TY Imports, Soluble Imports

# Convert the Bean Imports to Imports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Bean Imports'] <- 'Imports'

# Convert the Roast & Ground Imports to Imports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Roast & Ground Imports'] <- 'Imports'

# Convert the TY Imports to Imports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'TY Imports'] <- 'Imports'

# Convert the Soluble Imports to Imports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Soluble Imports'] <- 'Imports'


# Converting different types of Exports to Exports
# 'Bean Exports', 'Roast & Ground Exports', 'TY Exports', 'Soluble Exports'


# Convert the Bean Exports to Exports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Bean Exports'] <- 'Exports'

# Convert the Roast & Ground Exports to Exports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Roast & Ground Exports'] <- 'Exports'

# Convert the TY Exports to Exports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'TY Exports'] <- 'Exports'

# Convert the Soluble Exports to Exports

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Soluble Exports'] <- 'Exports'



In [56]:
# Converting different types of Production to Production
# Arabica Production, Cows Milk Production, Non-Comm. Production, Robusta Production, Other Milk Production, Area Harvested

# Convert the Arabica Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Arabica Production'] <- 'Production'


# Convert the Cows Milk Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Cows Milk Production'] <- 'Production'


# Convert the Non-Comm. Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Non-Comm. Production'] <- 'Production'


# Convert the Robusta Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Robusta Production'] <- 'Production'


# Convert the Other Milk Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Other Milk Production'] <- 'Production'


# Convert the Other Area Harvested to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Area Harvested'] <- 'Production'


# Convert the Other Production to Production

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Other Production'] <- 'Production'



In [51]:
# Converting different types of Domestic Consumptions to Domestic Consumption
# Soluble Dom. Cons., Rst,Ground Dom. Consum, Fluid Use Dom. Consum., FSI Consumption, Feed Dom. Consumption, 
# Industrial Dom. Cons, Food Use Dom. Cons., Fresh Dom. Consumption, Industrial Dom. Cons.

# Convert the Soluble Dom. Cons. to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Soluble Dom. Cons.'] <- 'Domestic Consumption'


# Convert the Rst,Ground Dom. Consum. to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Rst,Ground Dom. Consum'] <- 'Domestic Consumption'


# Convert the Fluid Use Dom. Consum. to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Fluid Use Dom. Consum.'] <- 'Domestic Consumption'


# Convert the FSI Consumption to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'FSI Consumption'] <- 'Domestic Consumption'


# Convert the Feed Dom. Consumption to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Feed Dom. Consumption'] <- 'Domestic Consumption'


# Convert the Industrial Dom. Cons to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Industrial Dom. Cons'] <- 'Domestic Consumption'


# Convert the Food Use Dom. Cons. to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Food Use Dom. Cons.'] <- 'Domestic Consumption'


# Convert the Fresh Dom. Consumption to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Fresh Dom. Consumption'] <- 'Domestic Consumption'


# Convert the Industrial Dom. Cons. to Domestic Consumption

USDA9821Filtered$Attribute_Description[USDA9821Filtered$Attribute_Description == 'Industrial Dom. Cons.'] <- 'Domestic Consumption'


In [60]:
# Filtering the data set and keeping only 'Production', 'Imports', 'Exports', 'Domestic Consumption'

USDAFil <- filter(USDA9821Filtered, Attribute_Description == 'Production' |
                           Attribute_Description == 'Imports' |
                           Attribute_Description == 'Exports' |
                           Attribute_Description == 'Domestic Consumption')

unique(USDAFil$Attribute_Description)

USDAFilCopy <- data.frame(USDAFil)

In [62]:
# Removing unnecessary columns

USDAFil <- subset(USDAFil, select = -c(X, Seen_On))
USDAFil

Unnamed: 0_level_0,Commodity_Description,Country_Code,Country_Name,Year,Attribute_Description,Unit_Description,Value
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>
1,"Almonds, Shelled Basis",GR,Greece,1998,Domestic Consumption,(MT),14000
2,"Almonds, Shelled Basis",GR,Greece,1998,Exports,(MT),1500
3,"Almonds, Shelled Basis",GR,Greece,1998,Imports,(MT),2600
4,"Almonds, Shelled Basis",GR,Greece,1998,Production,(MT),12000
5,"Almonds, Shelled Basis",IT,Italy,1998,Domestic Consumption,(MT),22593
6,"Almonds, Shelled Basis",IT,Italy,1998,Exports,(MT),1188
7,"Almonds, Shelled Basis",IT,Italy,1998,Imports,(MT),13781
8,"Almonds, Shelled Basis",IT,Italy,1998,Production,(MT),9000
9,"Almonds, Shelled Basis",MO,Morocco,1998,Domestic Consumption,(MT),8200
10,"Almonds, Shelled Basis",MO,Morocco,1998,Exports,(MT),0


In [65]:
# Picking the maximum Value for each Commodity since there seems to be many duplicate entries.

Global98_21 = USDAFil %>% group_by(Commodity_Description, Year, Attribute_Description, Country_Code, Country_Name) %>% 
  summarise(Value = max(Value), Unit_Description = "(MT)",
            .groups = 'drop')

# Deleting the Records where Value = 0

Global98_21 <- Global98_21[Global98_21$Value != 0, ] 

# Recoding from tibble to dataframe

Global1998and2021 <- as.data.frame(Global98_21)

View(Global1998and2021)

Commodity_Description,Year,Attribute_Description,Country_Code,Country_Name,Value,Unit_Description
<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<chr>
"Almonds, Shelled Basis",1998,Domestic Consumption,GR,Greece,14000,(MT)
"Almonds, Shelled Basis",1998,Domestic Consumption,IT,Italy,22593,(MT)
"Almonds, Shelled Basis",1998,Domestic Consumption,MO,Morocco,8200,(MT)
"Almonds, Shelled Basis",1998,Domestic Consumption,SP,Spain,29600,(MT)
"Almonds, Shelled Basis",1998,Domestic Consumption,TU,Turkey,13800,(MT)
"Almonds, Shelled Basis",1998,Domestic Consumption,US,United States,88414,(MT)
"Almonds, Shelled Basis",1998,Exports,GR,Greece,1500,(MT)
"Almonds, Shelled Basis",1998,Exports,IT,Italy,1188,(MT)
"Almonds, Shelled Basis",1998,Exports,SP,Spain,40700,(MT)
"Almonds, Shelled Basis",1998,Exports,TU,Turkey,200,(MT)


In [67]:
# Writing the CSV file

write_csv(Global1998and2021, "../Data/Global1998and2021.csv")