In [None]:
import json
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Step 1: Load JSON data
with open('cbdc.json', 'r') as file:
    data = json.load(file)

# Step 2: Prepare and preprocess data
features = []
labels = []

for entry in data:
    feature = [
        entry['country'], 
        entry['central_bank'], 
        entry['digital_currency'], 
        entry['type_']
    ]
    features.append(feature)
    
    labels.append(entry['CBCD_status'])

encoder = OneHotEncoder(handle_unknown='ignore')
features_encoded = encoder.fit_transform(features).toarray()

label_dict = {'Research': 0, 'Pilot': 1, 'Proof of concept': 2,'Launched':3,'Cancelled':4}
labels_encoded = np.array([label_dict[label] for label in labels])

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features_encoded, labels_encoded, test_size=0.2, random_state=42)

# Build TensorFlow model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(label_dict), activation='softmax')  # Use softmax for multiclass classification
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
# model.save('cbdc_model.h5')
model.save('cbdc_model.keras') 



In [57]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import difflib
# Load the saved model
model = tf.keras.models.load_model("tensor_models/firms/firm_registration_model.keras")

# Load the word index from the tokenizer
with open('tensor_models/firms/word_index.json', 'r') as f:
    word_index = json.load(f)

# Load the firm data
with open('tensor_models/firms/firms_data.json', 'r') as f:
    firms_data = json.load(f)

# Function to check if a firm is registered and return registration status with date
def check_registration(firm_name):
    # Tokenize the input
    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.word_index = word_index
    query_seq = tokenizer.texts_to_sequences([firm_name])
    query_padded = pad_sequences(query_seq, maxlen=10, padding='post')

    # Make prediction
    prediction = model.predict(query_padded)

    # Threshold for classification
    threshold = 0.5

    # Determine the registration status based on the prediction
    if prediction[0][0] >= threshold:
        # Find the closest matching firm name
        matches = difflib.get_close_matches(firm_name.lower(), [entry["Firm Name"].lower() for entry in firms_data], n=1, cutoff=0.6)
        if matches:
            # Find the firm data
            for entry in firms_data:
                if entry["Firm Name"].lower() == matches[0]:
                    return "Registered", entry["Date"]
    else:
        # Check for partial matches
        for entry in firms_data:
            if firm_name.lower() in entry["Firm Name"].lower():
                return "Registered", entry["Date"]

    return "Not Registered", None

# Example usage
firm_name = "Revolut Ltd"
result = check_registration(firm_name)
if result:
    status, date = result
    if status == "Registered":
        print(f"{firm_name} is {status} with date {date}")
    else:
        print(f"{firm_name} is {status}")
else:
    print(f"No information found for {firm_name}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
Revolut Ltd is Registered with date 26 Sept 2022


In [None]:
# Example user input
user_input = "united states of america"

# Preprocess the user input country
country, additional_keywords = preprocess_user_input(user_input)

# Query the entry by country
country_entry = query_entry_by_country(country)

if country_entry != "Country not found":
    # Format the entry details into HTML
    formatted_details_html = format_entry_details_html(country_entry)
    
    # Get CBDC news for the country
    cbdc_news_html = get_country_name(country)

    # Preprocess the user input and make prediction using the model
    input_data = {'country': country, 'central_bank': '', 'digital_currency': '', 'type_': ''}
    preprocessed_input = preprocess_input(input_data)
    prediction = model.predict(preprocessed_input)
    predicted_status = list(label_dict.keys())[np.argmax(prediction)]

    # Evaluate model accuracy
    accuracy = model.evaluate(preprocessed_input, np.array([label_dict[country_entry['CBCD_status']]]), verbose=0)[1] * 100

    print("Details for", user_input + ":")
    print("Predicted CBDC Status:", predicted_status)
    print("Model Accuracy: {:.2f}%".format(accuracy))
    print(cbdc_news_html)
    print(formatted_details_html)
else:
    print("Country not found")


In [15]:
{'qatar', 'rwanda', 'nepal', 'russian federation', 'curacao', 'india', 'denmark', 'spain', 'france', 'china', 'namibia', 'bangladesh', 'oman', 'morocco', 'kuwait', 'trinidad and tobago', 'south korea', 'jamaica', 'switzerland', "cote d'ivoire", 'republic of palau', 'indonesia', 'zimbabwe', 'mexico', 'palestine', 'thailand ', 'kenya', 'united states of america', 'nigeria', 'czech republic', 'bahamas', 'iran', 'azerbaijan', 'laos', 'kazakhstan', 'saudi arabia', 'honduras', 'peru', 'georgia', 'algeria', 'south africa', 'tunisia', 'japan', 'pakistan', 'england', 'lebanon', 'thailand', 'eswatini', 'montenegro', 'eastern caribbean economic and currency union (oecs/eccu)', 'iraq', 'philippines', 'chile', 'singapore', 'malaysia', 'madagascar', 'ecuador', 'zambia', 'macau', 'bahrain', 'sri lanka', 'uk', 'argentina', 'yemen', 'vietnam', 'colombia', 'euro area', 'turkey', 'vanuatu', 'australia', 'brazil', 'bhutan', 'tanzania', 'israel, hong kong', 'finland', 'uruguay', 'mongolia', 'united arab emirates', 'belarus', 'united kingdom', 'uganda', 'mauritius', 'new zealand', 'taiwan', 'mauritania', 'iceland', 'canada', 'egypt', 'sudan', 'austria', 'ukraine', 'ghana', 'sweden', 'hungary', 'hong kong', 'jordan', 'solomon islands', 'dominican republic', 'haiti', 'norway', 'poland', 'israel', 'tonga', 'fiji', 'guatemala'}
print(country_names)
for country in country_names:
    if country in 'i want to know about united kingdom':
        country_name = country
        break

similar_countries = [country for country in country_names if 'i want to know about united kingdom' in country]
if similar_countries:
    print('Similar countries found:', similar_countries)
else:
    print('No similar countries found',country_name)

{'vanuatu', 'malaysia', 'euro area', 'taiwan', 'algeria', 'haiti', 'brazil', 'pakistan', 'japan', 'jamaica', 'chile', 'egypt', 'china', 'trinidad and tobago', 'turkey', 'iraq', 'norway', 'hungary', 'united arab emirates', 'czech republic', 'mongolia', 'india', 'azerbaijan', 'peru', 'israel', 'bahamas', 'united kingdom', 'colombia', 'kenya', 'eastern caribbean economic and currency union (oecs/eccu)', 'solomon islands', 'united states of america', 'lebanon', 'singapore', 'ghana', 'qatar', 'ukraine', 'zimbabwe', 'uruguay', 'mauritius', 'hong kong', 'iceland', 'iran', 'saudi arabia', 'france', 'guatemala', 'tonga', 'australia', 'nigeria', 'rwanda', 'denmark', 'belarus', 'indonesia', 'curacao', 'madagascar', 'austria', 'uganda', 'poland', 'spain', 'england', 'montenegro', 'oman', 'fiji', 'morocco', 'bahrain', 'switzerland', 'argentina', 'kuwait', 'sri lanka', 'canada', 'bhutan', 'kazakhstan', 'thailand', 'south korea', 'eswatini', 'zambia', 'palestine', 'finland', 'sweden', 'republic of pa