From 17ffa5018bcaef2b102542f3d2c5772d17e8f80c Mon Sep 17 00:00:00 2001 From: Michael von Bodungen Date: Sat, 7 May 2022 08:21:16 -0500 Subject: [PATCH] added -s flag to product load to skip the CategoryId lookup --- impex/README.md | 8 ++ impex/src/lib.rs | 24 +++++- impex/src/products.rs | 166 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 175 insertions(+), 23 deletions(-) diff --git a/impex/README.md b/impex/README.md index 029e24a..fcc735a 100644 --- a/impex/README.md +++ b/impex/README.md @@ -441,6 +441,8 @@ The CSV file used to load products follows the VTEX API but has two additional f - CategoryUniqueIdentifier - the link to the category the product belongs to - BrandName - the name of the brand (used to generate the Brand File) +Note: If you have an integer based CategoryId and are using that instead of the CategoryUniqueIdentifier, you can populate the CategoryId column in the Product.csv file and then use the "-s" parameter for "skip_cat_lookup" and set it to "1". This will prevent the program from creating the CategoryId lookup table at the start of the load and can save a great deal of time if you have a large category tree. + The format of the file looks like the following: |Id |Name |DepartmentId|CategoryId|CategoryUniqueIdentifier|BrandId|BrandName |LinkId |RefId |IsVisible|Description |DescriptionShort|ReleaseDate |KeyWords |Title |IsActive|TaxCode|MetaTagDescription |SupplierId|ShowWithoutStock|AdWordsRemarketingCode|LomadeeCampaignCode|Score| |---|-----------------------------------------|------------|----------|------------------------|-------|-------------------|----------------------------------------------------|----------|---------|---------------------------------------------------------------------------------------------------------------|----------------|-------------------|----------------------------------------------|-----------------------------------------|--------|-------|-----------------------------------------|----------|----------------|----------------------|-------------------|-----| @@ -465,6 +467,12 @@ You should see output like the following: 34.546 [INFO] - Finished data load ``` +To run a product import and skip the CategoryId lookup (because you already populated the CategoryId column): +``` +RUST_LOG=info ./vtex_impex product -a import -f data/Products.csv -s 1 +``` +Note: -s parameter defaults to 0 which builds the CategoryId lookup. + ## SKU The CSV file to load SKUs follows the VTEX API but has two additonal columns: - ProductRefId - used instead of ProductId diff --git a/impex/src/lib.rs b/impex/src/lib.rs index 83806ab..f325f5b 100644 --- a/impex/src/lib.rs +++ b/impex/src/lib.rs @@ -49,6 +49,7 @@ struct Command { sku_file: String, concurrency: usize, rate_limit: NonZeroU32, + skip_cat_lookup: usize } arg_enum! { @@ -99,7 +100,8 @@ arg_enum! { #[derive(Debug)] #[allow(non_camel_case_types)] enum ProductActions { - import + import, + update } } @@ -370,6 +372,12 @@ impl Command { .value_name("RATELIMIT") .help("Sets the rate limit value (how many calls per second) - default is 40") .takes_value(true)) + .arg(Arg::with_name("SKIPCATLOOKUP") + .short("s") + .long("skip_cat_lookup") + .value_name("SKIPCATLOOKUP") + .help("If you pass in the category_id it will skip building the category id lookups") + .takes_value(true)) ) .subcommand(SubCommand::with_name("sku") .about("actions on the sku into VTEX") @@ -668,6 +676,7 @@ impl Command { sku_file: "".to_string(), concurrency: 1, rate_limit: NonZeroU32::new(1).unwrap(), + skip_cat_lookup: 0 }; match matches.subcommand() { @@ -756,6 +765,7 @@ impl Command { debug!("input_file: {}", command.input_file); command.concurrency = m.value_of("CONCURRENCY").unwrap_or("1").parse::().expect("CONCURRENCY must be a positive integer between 1 and 24. Default is 1 - Recommended"); command.rate_limit = m.value_of("RATE_LIMIT").unwrap_or("40").parse::().expect("RATE_LIMIT must be a positive integer between 1 and 200. Default is 40 - Recommended"); + command.skip_cat_lookup = m.value_of("SKIPCATLOOKUP").unwrap_or("0").parse::().expect("SKIPCATLOOKUP must be a 0 or 1. Default is 0 - perform category lookup, 1 will skip the category lookup"); } ("sku", Some(m)) => { command.object = "sku".to_string(); @@ -987,6 +997,18 @@ pub async fn run() -> Result<(), Box> { environment, cmd.concurrency, cmd.rate_limit, + cmd.skip_cat_lookup + ) + .await?; + } else if cmd.action.eq("update") { + products::update_products( + cmd.input_file.to_string(), + &client, + account_name, + environment, + cmd.concurrency, + cmd.rate_limit, + cmd.skip_cat_lookup ) .await?; } diff --git a/impex/src/products.rs b/impex/src/products.rs index 2430292..5bc43f8 100644 --- a/impex/src/products.rs +++ b/impex/src/products.rs @@ -2,6 +2,7 @@ use futures::{executor::block_on, stream, StreamExt}; use governor::{Jitter, Quota, RateLimiter}; use log::*; use reqwest::{Client, StatusCode}; +use std::collections::HashMap; use std::fs::File; use std::num::NonZeroU32; use std::sync::Arc; @@ -9,6 +10,8 @@ use std::{error::Error, time::Duration}; use vtex::model::Product; use vtex::utils; +// use crate::categories; + pub async fn load_products( file_path: String, client: &Client, @@ -16,20 +19,27 @@ pub async fn load_products( environment: String, concurrent_requests: usize, rate_limit: NonZeroU32, + skip_cat_lookup: usize ) -> Result<(), Box> { info!("Starting load of products"); // Read in the category tree and store in a HashMap for lookup - let categories = utils::get_vtex_category_tree(client, &account_name, &environment).await; - let category_lookup = utils::parse_category_tree(categories); - debug!("category_lookup: {:?}", category_lookup.len()); - - // Get a lookup for the cateogory name of a category by GroupIdentifier - let category_identifier_name_lookup = - utils::create_category_name_lookup(client, &account_name, &environment).await; - debug!( - "category_identifier_name_lookup: {:?}", - category_identifier_name_lookup.len() - ); + let mut categories = Vec::new(); + let mut category_lookup: HashMap = HashMap::new(); + let mut category_identifier_name_lookup: HashMap = HashMap::new(); + + if skip_cat_lookup == 0 { + categories = utils::get_vtex_category_tree(client, &account_name, &environment).await; + category_lookup = utils::parse_category_tree(categories); + debug!("category_lookup: {:?}", category_lookup.len()); + + // Get a lookup for the cateogory name of a category by GroupIdentifier + category_identifier_name_lookup = + utils::create_category_name_lookup(client, &account_name, &environment).await; + debug!( + "category_identifier_name_lookup: {:?}", + category_identifier_name_lookup.len() + ); + } // Get a lookup for the brand_id by brand name let brand_id_lookup = utils::create_brand_lookup(client, &account_name, &environment).await; @@ -46,18 +56,128 @@ pub async fn load_products( for line in rdr.deserialize() { let mut record: Product = line?; - // look up the category name - let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap(); - let parent_cat_name = category_identifier_name_lookup - .get(cat_unique_identifier) - .unwrap(); - // Look up the VTEX Category Id + if skip_cat_lookup == 0 { + // look up the category name + let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap(); + let parent_cat_name = category_identifier_name_lookup + .get(cat_unique_identifier) + .unwrap(); + // Look up the VTEX Category Id + debug!( + "ref_id: {:?} parent_cat_name: {:?}", + &record.ref_id, &parent_cat_name + ); + let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap(); + record.category_id = Some(*vtex_cat_id); + } + // Look up the brand_id + let brand_name = record + .brand_name + .as_ref() + .unwrap_or_else(|| panic!("BrandName missing in CSV for SKU Ref: {:?}", record.ref_id)); + let brand_id = brand_id_lookup.get(brand_name).unwrap_or_else(|| panic!("Brand Name: {:?} not found in lookup table. Make sure Brand Name in the BrandName column in Products.csv matches Brand Name in the Name column of the Brands.csv file. The values are case sensitive.", record.brand_name)); + record.brand_id = Some(*brand_id); + + product_recs.push(record); + } + + let lim = Arc::new(RateLimiter::direct(Quota::per_second(rate_limit))); + + let bodies = stream::iter(product_recs) + .map(|record| { + let client = &client; + let url = &url; + let lim = Arc::clone(&lim); + async move { + block_on(lim.until_ready_with_jitter(Jitter::up_to(Duration::from_millis(100)))); + + let response = client.post(url).json(&record).send().await?; + + info!( + "product: {:?}: response: {:?}", + record.ref_id, + response.status() + ); + if response.status() == StatusCode::TOO_MANY_REQUESTS { + info!("headers: {:?}", response.headers()); + } + response.text().await + } + }) + .buffer_unordered(concurrent_requests); + bodies + .for_each(|b| async { + match b { + Ok(b) => info!("output: {:?}", b), + Err(e) => error!("error: {:?}", e), + } + }) + .await; + + info!("finished loading products"); + + Ok(()) +} + +pub async fn update_products( + file_path: String, + client: &Client, + account_name: String, + environment: String, + concurrent_requests: usize, + rate_limit: NonZeroU32, + skip_cat_lookup: usize +) -> Result<(), Box> { + info!("Starting load of products"); + // Read in the category tree and store in a HashMap for lookup + let mut categories = Vec::new(); + let mut category_lookup: HashMap = HashMap::new(); + let mut category_identifier_name_lookup: HashMap = HashMap::new(); + + debug!("skip_cat_lookup={}", skip_cat_lookup); + if skip_cat_lookup == 0 { + categories = utils::get_vtex_category_tree(client, &account_name, &environment).await; + category_lookup = utils::parse_category_tree(categories); + debug!("category_lookup: {:?}", category_lookup.len()); + + // Get a lookup for the cateogory name of a category by GroupIdentifier + category_identifier_name_lookup = + utils::create_category_name_lookup(client, &account_name, &environment).await; debug!( - "ref_id: {:?} parent_cat_name: {:?}", - &record.ref_id, &parent_cat_name + "category_identifier_name_lookup: {:?}", + category_identifier_name_lookup.len() ); - let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap(); - record.category_id = Some(*vtex_cat_id); + } + + // Get a lookup for the brand_id by brand name + let brand_id_lookup = utils::create_brand_lookup(client, &account_name, &environment).await; + debug!("brand_id_lookup: {}", brand_id_lookup.len()); + + let url = "https://{accountName}.{environment}.com.br/api/catalog/pvt/product/{productId}" + .replace("{accountName}", &account_name) + .replace("{environment}", &environment); + let input = File::open(file_path)?; + let mut rdr = csv::Reader::from_reader(input); + + let mut product_recs: Vec = Vec::new(); + + for line in rdr.deserialize() { + let mut record: Product = line?; + + if skip_cat_lookup == 0 { + // look up the category name + let cat_unique_identifier = record.category_unique_identifier.as_ref().unwrap(); + let parent_cat_name = category_identifier_name_lookup + .get(cat_unique_identifier) + .unwrap(); + // Look up the VTEX Category Id + debug!( + "ref_id: {:?} parent_cat_name: {:?}", + &record.ref_id, &parent_cat_name + ); + let vtex_cat_id = category_lookup.get(&parent_cat_name.clone()).unwrap(); + record.category_id = Some(*vtex_cat_id); + } // Look up the brand_id let brand_name = record .brand_name @@ -78,8 +198,10 @@ pub async fn load_products( let lim = Arc::clone(&lim); async move { block_on(lim.until_ready_with_jitter(Jitter::up_to(Duration::from_millis(100)))); + let url_with_product_id = + url.replace("{productId}", record.id.unwrap().to_string().as_str()); - let response = client.post(url).json(&record).send().await?; + let response = client.put(url_with_product_id).json(&record).send().await?; info!( "product: {:?}: response: {:?}",