In [18]:
import re
import json

# Sample input text for demonstration (assumes some values might not be explicitly stated)
# Mohammed Mahadi to Try differnt API Responses for TESTING
input_text = """
Annual Sales Volume is 45 million SAR. The Growth ratio approximately  is 74%. The Market Gap stands at 10 million units, which is about 5% of the total market. 
Relative costs of raw materials to selling price are pegged at 12%. The demographic of Saudi Arabia shows that 55% are men, 45% are female, 50% are youth, 30% are elders, and 20% are children.
The demand and supply analysis reveals a demand of 500 million against a supply of 435 million. 
The Monthly Average income and expenditure per capita citizen in Saudi Arabia is 14,000 SAR."""

# Define patterns for numbers, amounts, and percentages
# 1. My Idea is to generate a JSON Response file which can serve both web & mobile applications
# 2. Mohammed Gaid (The App Developer) can create a backened dynamic table to be updated regularly by App Admins for more and new patterns
# 3. Default values can be altered by the same app window on 2.
# 4. then the text below would be a direct query from that backened table 
number_pattern = r"(\d+(\.\d+)?( million)?( SAR)?)"
percentage_pattern = r"(\d+(\.\d+)?%)"

# Define key words, their associated patterns, and default values
key_words_patterns_defaults = {
    "sales volume": {"pattern": number_pattern, "default": "200 million SAR"},
    "growth": {"pattern": percentage_pattern, "default": "70%"},
    "market gap": {"pattern": number_pattern, "default": "10 million units"},
    "relative cost": {"pattern": percentage_pattern, "default": "12%"},
    # Add more entries as needed
}

# Function to find fuzzy matches with default values
def find_fuzzy_matches_with_defaults(text, key_words_patterns_defaults):
    extracted_info = {}
    for key_word, info in key_words_patterns_defaults.items():
        pattern, default_value = info["pattern"], info["default"]
        # Construct regex pattern for key word followed by number/percentage
        regex = rf"{key_word}.*?{pattern}"
        match = re.search(regex, text, re.IGNORECASE | re.DOTALL)
        if match:
            # Store the first matching value
            extracted_info[key_word] = match.group().strip()
        else:
            # Use the default value if no match is found
            extracted_info[key_word] = default_value
    return extracted_info

# Find matches and prepare the output in JSON format
extracted_info = find_fuzzy_matches_with_defaults(input_text, key_words_patterns_defaults)

# Convert the extracted information to JSON, ensuring only the first occurrence is captured
json_output = json.dumps(extracted_info, indent=4, ensure_ascii=False)
print(json_output)


{
    "sales volume": "Sales Volume is 45 million SAR",
    "growth": "Growth ratio approximately  is 74%",
    "market gap": "Market Gap stands at 10 million",
    "relative cost": "Relative costs of raw materials to selling price are pegged at 12%"
}


## Test with new pattern (average per capita)

In [19]:
import re
import json

# Sample input text for demonstration (assumes some values might not be explicitly stated)
# Mohammed Mahadi to Try differnt API Responses 
input_text = """
As of my last update in April 2023, specific and current figures regarding the average per capita spending in Jeddah, Saudi Arabia, were not provided in my training data. Consumer spending can vary widely based on numerous factors including but not limited to economic conditions, inflation rates, consumer confidence, and changes in disposable income.

Per capita spending in a city like Jeddah, which is one of Saudi Arabia's major urban centers, would be influenced by its economic activities, demographics, and the standard of living. Jeddah, being a significant commercial hub as well as a gateway for pilgrimages to the Islamic holy cities Mecca and Medina, might exhibit different spending patterns compared to other cities, potentially showing higher per capita spending in certain categories such as hospitality, retail, and services.

For the most accurate and recent data on average per capita spending in Jeddah, it would be advisable to consult:
- Official reports from Saudi Arabian government departments, such as the Saudi Arabian Monetary Authority (SAMA) or the General Authority for Statistics.
- Research and market analysis reports by financial institutions, research firms, and consultancy agencies specializing in the Saudi Arabian market.
- Publications and studies from economic research institutes and universities that focus on the Saudi Arabian economy.

These sources may provide detailed insights into consumer spending trends, economic forecasts, and demographic analyses that can help to estimate average per capita spending in Jeddah more accurately.ccommodations, utilities, food, transportation, and leisure activities, it's beneficial to consult localized resources and current living cost guides【13†source】【14†source】."""

# Define patterns for numbers, amounts, and percentages
# 1. My Idea is to generate a JSON Response file which can serve both web & mobile applications
# 2. Mohammed Gaid (The App Developer) can create a backened dynamic table to be updated regularly by App Admins for more and new patterns
# 3. Default values can be altered by the same app window on 2.
# 4. then the text below would be a direct query from that backened table 
# 5. each pattern should be linked to a specific question 

number_pattern = r"SAR \d+(,\d+)?(\.\d+)?"
percentage_pattern = r"\d+(\.\d+)?%"

# Define key words, their associated patterns, and default values
key_words_patterns_defaults = {
    "sales volume": {"pattern": number_pattern, "default": "200 million SAR"},
    "growth": {"pattern": percentage_pattern, "default": "100%"},
    "market gap": {"pattern": number_pattern, "default": "10 million units"},
    "relative cost": {"pattern": percentage_pattern, "default": "12%"},
    "average per capita": {"pattern": number_pattern, "default": "3 thousands SAR"},

    
    # Add more entries as needed
}

def find_fuzzy_matches_with_defaults(text, key_words_patterns_defaults):
    extracted_info = {}
    for key_word, info in key_words_patterns_defaults.items():
        pattern, default_value = info["pattern"], info["default"]
        regex = rf"{key_word}.*?({pattern})"
        match = re.search(regex, text, re.IGNORECASE | re.DOTALL)
        if match:
            extracted_info[key_word] = match.group(1).strip()  # Adjusted to capture the numerical value directly
        else:
            extracted_info[key_word] = default_value
    return extracted_info

extracted_info = find_fuzzy_matches_with_defaults(input_text, key_words_patterns_defaults)
json_output = json.dumps(extracted_info, indent=4, ensure_ascii=False)
print(json_output)


{
    "sales volume": "200 million SAR",
    "growth": "100%",
    "market gap": "10 million units",
    "relative cost": "12%",
    "average per capita": "3 thousands SAR"
}


## Test with new pattern (raw material to the selling price)

In [22]:
import re
import json

# Sample input text for demonstration (assumes some values might not be explicitly stated)
# Mohammed Mahadi to Try differnt API Responses for Testing
input_text = """
As of the last update in April 2023, I don't have access to real-time databases or the ability to fetch live data, so I can't provide the most current specific ratio of raw material costs to the selling price in the food industry in Saudi Arabia. However, I can offer some general insights that might help you understand the dynamics of this ratio within the food or restaurant industry.

In the food industry, the cost of raw materials relative to the selling price, often referred to as the Cost of Goods Sold (COGS), is a critical measure that significantly impacts profitability. This ratio can vary widely depending on several factors, including the type of cuisine, sourcing practices, and market positioning of the establishment.

General Guidelines:
Fast Food and Quick Service Restaurants (QSRs): These establishments typically have a higher COGS ratio, ranging from 88% to 35% of the selling price, due to the emphasis on convenience and speed, which often requires pre-processed or ready-to-cook ingredients that can be more expensive.

Casual Dining: For casual dining restaurants, the COGS ratio often ranges between 25% and 30% of the selling price. These restaurants balance between the quality of ingredients and cost-efficiency, aiming to offer a more affordable dining experience compared to fine dining.

Fine Dining: Fine dining restaurants, which focus on the quality of ingredients, unique menus, and a higher level of service, can have a COGS ratio that is lower, around 20% to 30% of the selling price, due to higher menu prices. However, the absolute cost of raw materials in fine dining can be significantly higher because of the premium ingredients used.

Specifics for Saudi Arabia:
Local vs. Imported: The ratio can also be affected by the origin of the raw materials. Imported ingredients might increase the COGS due to shipping and import taxes, a factor that can be significant in Saudi Arabia, where many food items are imported.

Market Trends: The food industry in Saudi Arabia is influenced by local dietary preferences, international cuisine's popularity, and governmental policies on food importation and sustainability. These factors can lead to fluctuations in raw material costs."""
# Define patterns for numbers, amounts, and percentages
# 1. My Idea is to generate a JSON Response file which can serve both web & mobile applications
# 2. Mohammed Gaid (The App Developer) can create a backened dynamic table to be updated regularly by App Admins for more and new patterns
# 3. Default values can be altered by the same app window on 2.
# 4. then the text below would be a direct query from that backened table 
# 5. each pattern should be linked to a specific question 

number_pattern = r"SAR \d+(,\d+)?(\.\d+)?"
percentage_pattern = r"\d+(\.\d+)?%"

# Define key words, their associated patterns, and default values
key_words_patterns_defaults = {
    "sales volume": {"pattern": number_pattern, "default": "200 million SAR"},
    "growth": {"pattern": percentage_pattern, "default": "20%"},
    "market gap": {"pattern": number_pattern, "default": "10 million units"},
    "relative cost": {"pattern": percentage_pattern, "default": "12%"},
    "average per capita": {"pattern": number_pattern, "default": "3 thousands SAR"},
    "raw material": {"pattern": percentage_pattern, "default": "45%"},

    

    
    # Add more entries as needed
}

def find_fuzzy_matches_with_defaults(text, key_words_patterns_defaults):
    extracted_info = {}
    for key_word, info in key_words_patterns_defaults.items():
        pattern, default_value = info["pattern"], info["default"]
        regex = rf"{key_word}.*?({pattern})"
        match = re.search(regex, text, re.IGNORECASE | re.DOTALL)
        if match:
            extracted_info[key_word] = match.group(1).strip()  # Adjusted to capture the numerical value directly
        else:
            extracted_info[key_word] = default_value
    return extracted_info

extracted_info = find_fuzzy_matches_with_defaults(input_text, key_words_patterns_defaults)
json_output = json.dumps(extracted_info, indent=4, ensure_ascii=False)
print(json_output)


{
    "sales volume": "200 million SAR",
    "growth": "20%",
    "market gap": "10 million units",
    "relative cost": "12%",
    "average per capita": "3 thousands SAR",
    "raw material": "88%"
}
