diff --git a/bin/mindee.rb b/bin/mindee.rb index 6b6b59ab..0622f4c3 100755 --- a/bin/mindee.rb +++ b/bin/mindee.rb @@ -97,12 +97,6 @@ sync: false, async: true, }, - "fr-carte-vitale" => { - description: "FR Carte Vitale", - doc_class: Mindee::Product::FR::CarteVitale::CarteVitaleV1, - sync: true, - async: false, - }, "fr-id-card" => { description: "FR ID Card", doc_class: Mindee::Product::FR::IdCard::IdCardV2, diff --git a/docs/code_samples/carte_vitale_v1.txt b/docs/code_samples/carte_vitale_v1.txt deleted file mode 100644 index b6932419..00000000 --- a/docs/code_samples/carte_vitale_v1.txt +++ /dev/null @@ -1,19 +0,0 @@ -require 'mindee' - -# Init a new client -mindee_client = Mindee::Client.new(api_key: 'my-api-key') - -# Load a file from disk -input_source = mindee_client.source_from_path('/path/to/the/file.ext') - -# Parse the file -result = mindee_client.parse( - input_source, - Mindee::Product::FR::CarteVitale::CarteVitaleV1 -) - -# Print a full summary of the parsed data in RST format -puts result.document - -# Print the document-level parsed data -# puts result.document.inference.prediction diff --git a/lib/mindee/input/sources/url_input_source.rb b/lib/mindee/input/sources/url_input_source.rb index ad107600..5b4a9b13 100644 --- a/lib/mindee/input/sources/url_input_source.rb +++ b/lib/mindee/input/sources/url_input_source.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +require 'net/http' +require 'uri' +require 'fileutils' + module Mindee module Input module Source @@ -13,6 +17,110 @@ def initialize(url) @url = url end + + # Downloads the file from the URL and saves it to the specified path. + # + # @param path [String] Path to save the file to. + # @param filename [String, nil] Optional name to give to the file. + # @param username [String, nil] Optional username for authentication. + # @param password [String, nil] Optional password for authentication. + # @param token [String, nil] Optional token for JWT-based authentication. + # @param max_redirects [Integer] Maximum amount of redirects to follow. + # @return [String] The full path of the saved file. + def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3) + response_body = fetch_file_content(username: username, password: password, token: token, + max_redirects: max_redirects) + + filename = fill_filename(filename) + + full_path = File.join(path.chomp('/'), filename) + File.write(full_path, response_body) + + full_path + end + + # Downloads the file from the url, and returns a BytesInputSource wrapper object for it. + # + # @param filename [String, nil] Optional name to give to the file. + # @param username [String, nil] Optional username for authentication. + # @param password [String, nil] Optional password for authentication. + # @param token [String, nil] Optional token for JWT-based authentication. + # @param max_redirects [Integer] Maximum amount of redirects to follow. + # @return [BytesInputSource] The full path of the saved file. + def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3) + filename = fill_filename(filename) + response_body = fetch_file_content(username: username, password: password, token: token, + max_redirects: max_redirects) + bytes = StringIO.new(response_body) + + BytesInputSource.new(bytes.read, filename) + end + + # Fetches the file content from the URL. + # + # @param username [String, nil] Optional username for authentication. + # @param password [String, nil] Optional password for authentication. + # @param token [String, nil] Optional token for JWT-based authentication. + # @param max_redirects [Integer] Maximum amount of redirects to follow. + # @return [String] The downloaded file content. + def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3) + uri = URI.parse(@url) + request = Net::HTTP::Get.new(uri) + + request['Authorization'] = "Bearer #{token}" if token + request.basic_auth(username, password) if username && password + + response = make_request(uri, request, max_redirects) + if response.code.to_i > 299 + raise "Failed to download file: HTTP status code #{response.code}" + elsif response.code.to_i < 200 + raise "Failed to download file: Invalid response code #{response.code}." + end + + response.body + end + + private + + def extract_filename_from_url(uri) + filename = File.basename(uri.path) + filename.empty? ? '' : filename + end + + def fill_filename(filename) + filename ||= extract_filename_from_url(URI.parse(@url)) + if filename.empty? || File.extname(filename).empty? + filename = generate_file_name(extension: get_file_extension(filename)) + end + filename + end + + def make_request(uri, request, max_redirects) + Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| + response = http.request(request) + if response.is_a?(Net::HTTPRedirection) && max_redirects.positive? + location = response['location'] + raise 'No location in redirection header.' if location.nil? + + new_uri = URI.parse(location) + request = Net::HTTP::Get.new(new_uri) + make_request(new_uri, request, max_redirects - 1) + else + response + end + end + end + + def get_file_extension(filename) + ext = File.extname(filename) + ext.empty? ? nil : ext.downcase + end + + def generate_file_name(extension: nil) + extension ||= '.tmp' + random_string = Array.new(8) { rand(36).to_s(36) }.join + "mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}" + end end end end diff --git a/lib/mindee/product.rb b/lib/mindee/product.rb index 8b6d5051..66225cf3 100644 --- a/lib/mindee/product.rb +++ b/lib/mindee/product.rb @@ -13,7 +13,6 @@ require_relative 'product/fr/bank_account_details/bank_account_details_v2' require_relative 'product/fr/bank_statement/bank_statement_v1' require_relative 'product/fr/carte_grise/carte_grise_v1' -require_relative 'product/fr/carte_vitale/carte_vitale_v1' require_relative 'product/fr/id_card/id_card_v1' require_relative 'product/fr/id_card/id_card_v2' require_relative 'product/fr/energy_bill/energy_bill_v1' diff --git a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb b/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb deleted file mode 100644 index c38e1e74..00000000 --- a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -require_relative '../../../parsing' -require_relative 'carte_vitale_v1_document' -require_relative 'carte_vitale_v1_page' - -module Mindee - module Product - module FR - # Carte Vitale module. - module CarteVitale - # Carte Vitale API version 1 inference prediction. - class CarteVitaleV1 < Mindee::Parsing::Common::Inference - @endpoint_name = 'carte_vitale' - @endpoint_version = '1' - - # @param prediction [Hash] - def initialize(prediction) - super - @prediction = CarteVitaleV1Document.new(prediction['prediction'], nil) - @pages = [] - prediction['pages'].each do |page| - if page.key?('prediction') && !page['prediction'].nil? && !page['prediction'].empty? - @pages.push(CarteVitaleV1Page.new(page)) - end - end - end - - class << self - # Name of the endpoint for this product. - # @return [String] - attr_reader :endpoint_name - # Version for this product. - # @return [String] - attr_reader :endpoint_version - end - end - end - end - end -end diff --git a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb b/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb deleted file mode 100644 index 98aa4c6e..00000000 --- a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -require_relative '../../../parsing' - -module Mindee - module Product - module FR - module CarteVitale - # Carte Vitale API version 1.1 document data. - class CarteVitaleV1Document < Mindee::Parsing::Common::Prediction - include Mindee::Parsing::Standard - # The given name(s) of the card holder. - # @return [Array] - attr_reader :given_names - # The date the card was issued. - # @return [Mindee::Parsing::Standard::DateField] - attr_reader :issuance_date - # The Social Security Number (Numéro de Sécurité Sociale) of the card holder - # @return [Mindee::Parsing::Standard::StringField] - attr_reader :social_security - # The surname of the card holder. - # @return [Mindee::Parsing::Standard::StringField] - attr_reader :surname - - # @param prediction [Hash] - # @param page_id [Integer, nil] - def initialize(prediction, page_id) - super() - @given_names = [] - prediction['given_names'].each do |item| - @given_names.push(StringField.new(item, page_id)) - end - @issuance_date = DateField.new(prediction['issuance_date'], page_id) - @social_security = StringField.new(prediction['social_security'], page_id) - @surname = StringField.new(prediction['surname'], page_id) - end - - # @return [String] - def to_s - given_names = @given_names.join("\n #{' ' * 15}") - out_str = String.new - out_str << "\n:Given Name(s): #{given_names}".rstrip - out_str << "\n:Surname: #{@surname}".rstrip - out_str << "\n:Social Security Number: #{@social_security}".rstrip - out_str << "\n:Issuance Date: #{@issuance_date}".rstrip - out_str[1..].to_s - end - end - end - end - end -end diff --git a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb b/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb deleted file mode 100644 index 0a9ba2b2..00000000 --- a/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +++ /dev/null @@ -1,34 +0,0 @@ -# frozen_string_literal: true - -require_relative '../../../parsing' -require_relative 'carte_vitale_v1_document' - -module Mindee - module Product - module FR - module CarteVitale - # Carte Vitale API version 1.1 page data. - class CarteVitaleV1Page < Mindee::Parsing::Common::Page - # @param prediction [Hash] - def initialize(prediction) - super(prediction) - @prediction = CarteVitaleV1PagePrediction.new( - prediction['prediction'], - prediction['id'] - ) - end - end - - # Carte Vitale V1 page prediction. - class CarteVitaleV1PagePrediction < CarteVitaleV1Document - # @return [String] - def to_s - out_str = String.new - out_str << "\n#{super}" - out_str - end - end - end - end - end -end diff --git a/spec/document/fr/carte_vitale_v1_spec.rb b/spec/document/fr/carte_vitale_v1_spec.rb deleted file mode 100644 index 2d4aa3e5..00000000 --- a/spec/document/fr/carte_vitale_v1_spec.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -require 'json' -require 'mindee/product' -require 'mindee/parsing' - -require_relative '../../data' - -DIR_FR_CARTE_VITALE_V1 = File.join(DATA_DIR, 'products', 'carte_vitale', 'response_v1').freeze - -describe Mindee::Product::FR::CarteVitale::CarteVitaleV1 do - context 'A Carte Vitale V1' do - it 'should load an empty document prediction' do - response = load_json(DIR_FR_CARTE_VITALE_V1, 'empty.json') - inference = Mindee::Parsing::Common::Document.new( - Mindee::Product::FR::CarteVitale::CarteVitaleV1, - response['document'] - ).inference - expect(inference.product.type).to eq('standard') - end - - it 'should load a complete document prediction' do - to_string = read_file(DIR_FR_CARTE_VITALE_V1, 'summary_full.rst') - response = load_json(DIR_FR_CARTE_VITALE_V1, 'complete.json') - document = Mindee::Parsing::Common::Document.new( - Mindee::Product::FR::CarteVitale::CarteVitaleV1, - response['document'] - ) - expect(document.to_s).to eq(to_string) - end - end -end diff --git a/spec/input/sources_spec.rb b/spec/input/sources_spec.rb index 4041198a..72c2244b 100644 --- a/spec/input/sources_spec.rb +++ b/spec/input/sources_spec.rb @@ -50,18 +50,6 @@ end end - context 'A remote url file' do - it 'should not send an invalid URL' do - expect do - Mindee::Input::Source::UrlInputSource.new('http://invalid-url') - end.to raise_error 'URL must be HTTPS' - end - it 'should send a valid URL' do - input = Mindee::Input::Source::UrlInputSource.new('https://platform.mindee.com') - expect(input.url).to eq('https://platform.mindee.com') - end - end - context 'A broken fixable PDF' do mindee_client = Mindee::Client.new(api_key: 'invalid-api-key') it 'Should not raise a mime error' do diff --git a/spec/input/url_input_source_integration.rb b/spec/input/url_input_source_integration.rb new file mode 100644 index 00000000..0719d875 --- /dev/null +++ b/spec/input/url_input_source_integration.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require 'rspec' +require 'mindee' + +describe Mindee::Input::Source::UrlInputSource do + it 'retrieves response from a remote file' do + api_key = ENV.fetch('MINDEE_API_KEY', nil) + client = Mindee::Client.new(api_key: api_key) + remote_input = Mindee::Input::Source::UrlInputSource.new('https://github.com/mindee/client-lib-test-data/blob/main/products/invoice_splitter/invoice_5p.pdf?raw=true') + + local_input = remote_input.as_local_input_source + expect(local_input.filename).to eq('invoice_5p.pdf') + + result = client.parse(local_input, Mindee::Product::Invoice::InvoiceV4) + expect(result.document.n_pages).to eq(5) + end +end diff --git a/spec/input/url_input_source_spec.rb b/spec/input/url_input_source_spec.rb new file mode 100644 index 00000000..12cc4b8d --- /dev/null +++ b/spec/input/url_input_source_spec.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require 'rspec' +require 'mindee' +require_relative '../http/mock_http_response' + +RSpec.describe Mindee::Input::Source::UrlInputSource do + let(:valid_url) { 'https://validurl/some/file.jpg' } + let(:valid_url_no_filename) { 'https://validurl/some/' } + let(:invalid_url) { 'http://invalidurl/some/file.jpg' } + let(:output_dir) { "#{DATA_DIR}/output/" } + + describe '#initialize' do + context 'with valid URL' do + it 'creates a new instance' do + input = described_class.new('https://platform.mindee.com') + expect(input.url).to eq('https://platform.mindee.com') + end + end + + context 'with invalid URL' do + it 'raises an error for invalid URLs' do + expect { described_class.new(invalid_url) }.to raise_error('URL must be HTTPS') + end + end + end + + describe '#as_local_input_source' do + let(:url_input_source) { described_class.new(valid_url) } + let(:url_input_source_no_filename) { described_class.new(valid_url_no_filename) } + + before do + allow(Net::HTTP).to receive(:start).and_return(mock_response) + end + + context 'when download is successful' do + let(:mock_response) { MockHTTPResponse.new('1.1', '200', 'OK', 'file content') } + + it 'returns a BytesInputSource' do + result = url_input_source.as_local_input_source(filename: 'file.pdf') + expect(result).to be_a(Mindee::Input::Source::BytesInputSource) + expect(result.filename).to eq('file.pdf') + expect(result.io_stream).to be_a(StringIO) + expect(result.io_stream.read).to eq('file content') + end + + it 'uses a custom filename when provided' do + result = url_input_source.as_local_input_source(filename: 'custom.pdf') + expect(result.filename).to eq('custom.pdf') + end + + it 'handles authentication' do + result = url_input_source.as_local_input_source(username: 'user', password: 'pass') + expect(result).to be_a(Mindee::Input::Source::BytesInputSource) + end + end + + context 'when download fails' do + let(:mock_response) { MockHTTPResponse.new('1.1', '404', 'Not Found', '') } + + it 'raises an error' do + expect { url_input_source.as_local_input_source }.to raise_error(RuntimeError, %r{Failed to download file}) + end + end + end + + describe '#save_to_file' do + let(:url_input_source) { described_class.new(valid_url) } + let(:url_input_source_no_filename) { described_class.new(valid_url_no_filename) } + + before do + allow(Net::HTTP).to receive(:start).and_return(mock_response) + allow(File).to receive(:write) + end + + context 'when download is successful' do + let(:mock_response) { MockHTTPResponse.new('1.1', '200', 'OK', 'file content') } + + it 'generates a valid filename when not provided' do + output_file_path = url_input_source_no_filename.save_to_file(output_dir) + expect(output_file_path).to match(%r{mindee_temp_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}_[a-z0-9]{8}\.tmp}) + end + + it 'saves the file with the provided filename' do + result = url_input_source.save_to_file('/tmp', filename: 'file.pdf') + expect(result).to eq('/tmp/file.pdf') + expect(File).to have_received(:write).with('/tmp/file.pdf', 'file content') + end + + it 'uses a custom filename when provided' do + result = url_input_source.save_to_file('/tmp', filename: 'custom.pdf') + expect(result).to eq('/tmp/custom.pdf') + end + + it 'handles authentication' do + result = url_input_source_no_filename.save_to_file('/tmp', username: 'user', password: 'pass') + expect(result).to match(%r{/tmp/mindee_temp_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}_[a-z0-9]{8}\.tmp}) + end + end + + context 'when download fails' do + let(:mock_response) { MockHTTPResponse.new('1.1', '404', 'Not Found', '') } + + it 'raises an error' do + expect { url_input_source.save_to_file('/tmp') }.to raise_error(RuntimeError, %r{Failed to download file}) + end + end + end +end