Skip to content

Commit

Permalink
added -s flag to product load to skip the CategoryId lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
mvonbodun committed May 7, 2022
1 parent b93be68 commit 17ffa50
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 23 deletions.
8 changes: 8 additions & 0 deletions impex/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ The CSV file used to load products follows the VTEX API but has two additional f
- CategoryUniqueIdentifier - the link to the category the product belongs to
- BrandName - the name of the brand (used to generate the Brand File)
Note: If you have an integer based CategoryId and are using that instead of the CategoryUniqueIdentifier, you can populate the CategoryId column in the Product.csv file and then use the "-s" parameter for "skip_cat_lookup" and set it to "1". This will prevent the program from creating the CategoryId lookup table at the start of the load and can save a great deal of time if you have a large category tree.
The format of the file looks like the following:
|Id |Name |DepartmentId|CategoryId|CategoryUniqueIdentifier|BrandId|BrandName |LinkId |RefId |IsVisible|Description |DescriptionShort|ReleaseDate |KeyWords |Title |IsActive|TaxCode|MetaTagDescription |SupplierId|ShowWithoutStock|AdWordsRemarketingCode|LomadeeCampaignCode|Score|
|---|-----------------------------------------|------------|----------|------------------------|-------|-------------------|----------------------------------------------------|----------|---------|---------------------------------------------------------------------------------------------------------------|----------------|-------------------|----------------------------------------------|-----------------------------------------|--------|-------|-----------------------------------------|----------|----------------|----------------------|-------------------|-----|
Expand All @@ -465,6 +467,12 @@ You should see output like the following:
34.546 [INFO] - Finished data load
```
To run a product import and skip the CategoryId lookup (because you already populated the CategoryId column):
```
RUST_LOG=info ./vtex_impex product -a import -f data/Products.csv -s 1
```
Note: -s parameter defaults to 0 which builds the CategoryId lookup.
## SKU
The CSV file to load SKUs follows the VTEX API but has two additonal columns:
- ProductRefId - used instead of ProductId
Expand Down
24 changes: 23 additions & 1 deletion impex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct Command {
sku_file: String,
concurrency: usize,
rate_limit: NonZeroU32,
skip_cat_lookup: usize
}

arg_enum! {
Expand Down Expand Up @@ -99,7 +100,8 @@ arg_enum! {
#[derive(Debug)]
#[allow(non_camel_case_types)]
enum ProductActions {
import
import,
update
}
}

Expand Down Expand Up @@ -370,6 +372,12 @@ impl Command {
.value_name("RATELIMIT")
.help("Sets the rate limit value (how many calls per second) - default is 40")
.takes_value(true))
.arg(Arg::with_name("SKIPCATLOOKUP")
.short("s")
.long("skip_cat_lookup")
.value_name("SKIPCATLOOKUP")
.help("If you pass in the category_id it will skip building the category id lookups")
.takes_value(true))
)
.subcommand(SubCommand::with_name("sku")
.about("actions on the sku into VTEX")
Expand Down Expand Up @@ -668,6 +676,7 @@ impl Command {
sku_file: "".to_string(),
concurrency: 1,
rate_limit: NonZeroU32::new(1).unwrap(),
skip_cat_lookup: 0
};

match matches.subcommand() {
Expand Down Expand Up @@ -756,6 +765,7 @@ impl Command {
debug!("input_file: {}", command.input_file);
command.concurrency = m.value_of("CONCURRENCY").unwrap_or("1").parse::<usize>().expect("CONCURRENCY must be a positive integer between 1 and 24. Default is 1 - Recommended");
command.rate_limit = m.value_of("RATE_LIMIT").unwrap_or("40").parse::<NonZeroU32>().expect("RATE_LIMIT must be a positive integer between 1 and 200. Default is 40 - Recommended");
command.skip_cat_lookup = m.value_of("SKIPCATLOOKUP").unwrap_or("0").parse::<usize>().expect("SKIPCATLOOKUP must be a 0 or 1. Default is 0 - perform category lookup, 1 will skip the category lookup");
}
("sku", Some(m)) => {
command.object = "sku".to_string();
Expand Down Expand Up @@ -987,6 +997,18 @@ pub async fn run() -> Result<(), Box<dyn Error>> {
environment,
cmd.concurrency,
cmd.rate_limit,
cmd.skip_cat_lookup
)
.await?;
} else if cmd.action.eq("update") {
products::update_products(
cmd.input_file.to_string(),
&client,
account_name,
environment,
cmd.concurrency,
cmd.rate_limit,
cmd.skip_cat_lookup
)
.await?;
}
Expand Down
166 changes: 144 additions & 22 deletions impex/src/products.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,44 @@ use futures::{executor::block_on, stream, StreamExt};
use governor::{Jitter, Quota, RateLimiter};
use log::*;
use reqwest::{Client, StatusCode};
use std::collections::HashMap;
use std::fs::File;
use std::num::NonZeroU32;
use std::sync::Arc;
use std::{error::Error, time::Duration};
use vtex::model::Product;
use vtex::utils;

// use crate::categories;

pub async fn load_products(
file_path: String,
client: &Client,
account_name: String,
environment: String,
concurrent_requests: usize,
rate_limit: NonZeroU32,
skip_cat_lookup: usize
) -> Result<(), Box<dyn Error>> {
info!("Starting load of products");
// Read in the category tree and store in a HashMap for lookup
let categories = utils::get_vtex_category_tree(client, &account_name, &environment).await;
let category_lookup = utils::parse_category_tree(categories);
debug!("category_lookup: {:?}", category_lookup.len());

// Get a lookup for the cateogory name of a category by GroupIdentifier
let category_identifier_name_lookup =
utils::create_category_name_lookup(client, &account_name, &environment).await;
debug!(
"category_identifier_name_lookup: {:?}",
category_identifier_name_lookup.len()
);
let mut categories = Vec::new();
let mut category_lookup: HashMap<String, i32> = HashMap::new();
let mut category_identifier_name_lookup: HashMap<String, String> = HashMap::new();

if skip_cat_lookup == 0 {
categories = utils::get_vtex_category_tree(client, &account_name, &environment).await;
category_lookup = utils::parse_category_tree(categories);
debug!("category_lookup: {:?}", category_lookup.len());

// Get a lookup for the cateogory name of a category by GroupIdentifier
category_identifier_name_lookup =
utils::create_category_name_lookup(client, &account_name, &environment).await;
debug!(
"category_identifier_name_lookup: {:?}",
category_identifier_name_lookup.len()
);
}

// Get a lookup for the brand_id by brand name
let brand_id_lookup = utils::create_brand_lookup(client, &account_name, &environment).await;
Expand All @@ -46,18 +56,128 @@ pub async fn load_products(
for line in rdr.deserialize() {
let mut record: Product = line?;

// look up the category name
let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap();
let parent_cat_name = category_identifier_name_lookup
.get(cat_unique_identifier)
.unwrap();
// Look up the VTEX Category Id
if skip_cat_lookup == 0 {
// look up the category name
let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap();
let parent_cat_name = category_identifier_name_lookup
.get(cat_unique_identifier)
.unwrap();
// Look up the VTEX Category Id
debug!(
"ref_id: {:?} parent_cat_name: {:?}",
&record.ref_id, &parent_cat_name
);
let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap();
record.category_id = Some(*vtex_cat_id);
}
// Look up the brand_id
let brand_name = record
.brand_name
.as_ref()
.unwrap_or_else(|| panic!("BrandName missing in CSV for SKU Ref: {:?}", record.ref_id));
let brand_id = brand_id_lookup.get(brand_name).unwrap_or_else(|| panic!("Brand Name: {:?} not found in lookup table. Make sure Brand Name in the BrandName column in Products.csv matches Brand Name in the Name column of the Brands.csv file. The values are case sensitive.", record.brand_name));
record.brand_id = Some(*brand_id);

product_recs.push(record);
}

let lim = Arc::new(RateLimiter::direct(Quota::per_second(rate_limit)));

let bodies = stream::iter(product_recs)
.map(|record| {
let client = &client;
let url = &url;
let lim = Arc::clone(&lim);
async move {
block_on(lim.until_ready_with_jitter(Jitter::up_to(Duration::from_millis(100))));

let response = client.post(url).json(&record).send().await?;

info!(
"product: {:?}: response: {:?}",
record.ref_id,
response.status()
);
if response.status() == StatusCode::TOO_MANY_REQUESTS {
info!("headers: {:?}", response.headers());
}
response.text().await
}
})
.buffer_unordered(concurrent_requests);
bodies
.for_each(|b| async {
match b {
Ok(b) => info!("output: {:?}", b),
Err(e) => error!("error: {:?}", e),
}
})
.await;

info!("finished loading products");

Ok(())
}

pub async fn update_products(
file_path: String,
client: &Client,
account_name: String,
environment: String,
concurrent_requests: usize,
rate_limit: NonZeroU32,
skip_cat_lookup: usize
) -> Result<(), Box<dyn Error>> {
info!("Starting load of products");
// Read in the category tree and store in a HashMap for lookup
let mut categories = Vec::new();
let mut category_lookup: HashMap<String, i32> = HashMap::new();
let mut category_identifier_name_lookup: HashMap<String, String> = HashMap::new();

debug!("skip_cat_lookup={}", skip_cat_lookup);
if skip_cat_lookup == 0 {
categories = utils::get_vtex_category_tree(client, &account_name, &environment).await;
category_lookup = utils::parse_category_tree(categories);
debug!("category_lookup: {:?}", category_lookup.len());

// Get a lookup for the cateogory name of a category by GroupIdentifier
category_identifier_name_lookup =
utils::create_category_name_lookup(client, &account_name, &environment).await;
debug!(
"ref_id: {:?} parent_cat_name: {:?}",
&record.ref_id, &parent_cat_name
"category_identifier_name_lookup: {:?}",
category_identifier_name_lookup.len()
);
let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap();
record.category_id = Some(*vtex_cat_id);
}

// Get a lookup for the brand_id by brand name
let brand_id_lookup = utils::create_brand_lookup(client, &account_name, &environment).await;
debug!("brand_id_lookup: {}", brand_id_lookup.len());

let url = "https://{accountName}.{environment}.com.br/api/catalog/pvt/product/{productId}"
.replace("{accountName}", &account_name)
.replace("{environment}", &environment);
let input = File::open(file_path)?;
let mut rdr = csv::Reader::from_reader(input);

let mut product_recs: Vec<Product> = Vec::new();

for line in rdr.deserialize() {
let mut record: Product = line?;

if skip_cat_lookup == 0 {
// look up the category name
let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap();
let parent_cat_name = category_identifier_name_lookup
.get(cat_unique_identifier)
.unwrap();
// Look up the VTEX Category Id
debug!(
"ref_id: {:?} parent_cat_name: {:?}",
&record.ref_id, &parent_cat_name
);
let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap();
record.category_id = Some(*vtex_cat_id);
}
// Look up the brand_id
let brand_name = record
.brand_name
Expand All @@ -78,8 +198,10 @@ pub async fn load_products(
let lim = Arc::clone(&lim);
async move {
block_on(lim.until_ready_with_jitter(Jitter::up_to(Duration::from_millis(100))));
let url_with_product_id =
url.replace("{productId}", record.id.unwrap().to_string().as_str());

let response = client.post(url).json(&record).send().await?;
let response = client.put(url_with_product_id).json(&record).send().await?;

info!(
"product: {:?}: response: {:?}",
Expand Down

0 comments on commit 17ffa50

Please sign in to comment.