Validation (#5)

* Update to 2024.0 (#4) * Update to 2024.0 * Changes for reviews round 1 * empty line in env yml file * Changes for reviews round 2 * fix environment
oneapi-src · Feb 2, 2024 · 4b43109 · 4b43109
1 parent b940a94
commit 4b43109
Show file tree

Hide file tree

Showing 23 changed files with 715 additions and 483 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2023, Intel Corporation
+Copyright (c) 2024, Intel Corporation
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

diff --git a/README.md b/README.md
diff --git a/assets/e2e-embedding-original.png b/assets/e2e-embedding-original.png
diff --git a/assets/e2e-embedding-reranking.png b/assets/e2e-embedding-reranking.png
diff --git a/assets/embedding_flow.png b/assets/embedding_flow.png
diff --git a/assets/offline-embedding-relative-perf.png b/assets/offline-embedding-relative-perf.png
diff --git a/assets/real_time_flow.png b/assets/real_time_flow.png
diff --git a/assets/realtime-search-relative-perf.png b/assets/realtime-search-relative-perf.png
diff --git a/assets/reranker.png b/assets/reranker.png
diff --git a/data/README.md b/data/README.md
diff --git a/data/download_data.py b/data/download_data.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2022 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914

diff --git a/env/intel/intel.yml b/env/intel/intel.yml
diff --git a/env/intel_env.yml b/env/intel_env.yml
@@ -0,0 +1,18 @@
+name: vertical_search_intel
+channels:
+  - intel
+  - conda-forge
+dependencies:
+  - python=3.9
+  - intel-extension-for-pytorch==2.0.100
+  - pandas=1.5.3
+  - cpuonly=1.0
+  - neural-compressor=2.3.1
+  - opencv=4.8.1
+  - scipy=1.10.1
+  - datasets=2.16.1
+  - gperftools=2.10
+  - psutil=5.8.0
+  - transformers=4.37.1
+  - sentence-transformers=2.3.1
+
diff --git a/env/stock/stock.yml b/env/stock/stock.yml
diff --git a/setupenv.sh b/setupenv.sh
diff --git a/src/configs/vse_config_base.yml b/src/configs/vse_config_base.yml
@@ -12,4 +12,4 @@ model:
 inference:
   top_k : 5
   score_function : dot # cos_sim, dot
-  corpus_embeddings_path : ../saved_output/embeddings.pkl
+  corpus_embeddings_path : output/embeddings.pkl
diff --git a/src/configs/vse_config_inc.yml b/src/configs/vse_config_inc.yml
@@ -9,10 +9,10 @@ model:
   max_seq_length: 128
 
   # inc required config parameters
-  path: ../saved_models/inc_int8
+  path: output/models/inc_int8
 
 # inference config
 inference:
   top_k : 5
   score_function : dot # cos_sim, dot
-  corpus_embeddings_path : ../saved_output/embeddings.pkl
+  corpus_embeddings_path : output/embeddings.pkl
diff --git a/src/display_rankings.py b/src/display_rankings.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914

diff --git a/src/run_document_embedder.py b/src/run_document_embedder.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914
@@ -195,9 +195,9 @@ def main(flags):
     max_sequence_length = conf['model']['max_seq_length']
 
     # use IPEX to optimize model
-    if flags.intel:
-        import intel_extension_for_pytorch as ipex
-        embedder = ipex.optimize(embedder, dtype=torch.float32)
+
+    import intel_extension_for_pytorch as ipex
+    embedder = ipex.optimize(embedder, dtype=torch.float32)
 
     sample_inputs = tokenizer.batch_decode([
         random.sample(
@@ -283,13 +283,6 @@ def main(flags):
                         default=100
                         )
 
-    parser.add_argument('--intel',
-                        required=False,
-                        help="use intel pytorch extension to optimize model",
-                        action="store_true",
-                        default=False
-                        )
-
     FLAGS = parser.parse_args()
 
     main(FLAGS)
diff --git a/src/run_quantize_inc.py b/src/run_quantize_inc.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914

diff --git a/src/run_query_search.py b/src/run_query_search.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914
@@ -233,9 +233,11 @@ def main(flags):
     embedder.eval()
     max_sequence_length = conf['model']['max_seq_length']
 
-    if flags.intel:
-        import intel_extension_for_pytorch as ipex
-        embedder = ipex.optimize(embedder, dtype=torch.float32)
+    # use IPEX to optimize model
+
+    import intel_extension_for_pytorch as ipex
+    embedder = ipex.optimize(embedder, dtype=torch.float32)
+
     sample_inputs = tokenizer.batch_decode([
         random.sample(
             range(tokenizer.vocab_size), max_sequence_length) for
@@ -348,13 +350,6 @@ def main(flags):
                         default=100
                         )
 
-    parser.add_argument('--intel',
-                        required=False,
-                        help="use intel pytorch extension to optimize model",
-                        action="store_true",
-                        default=False
-                        )
-
     FLAGS = parser.parse_args()
 
     main(FLAGS)
diff --git a/src/utils/dataloader.py b/src/utils/dataloader.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914

diff --git a/src/utils/embed.py b/src/utils/embed.py
@@ -1,7 +1,7 @@
 # !/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: BSD-3-Clause
 
 # pylint: disable=C0415,E0401,R0914