### Notebook to test asset analyzer framework

In [None]:
from src.AssetAnalyzer.asset_analyzer import AssetAnalyzer
import pandas as pd
import os
import json
import openai

In [None]:
# Load in environment variables from config.json file
with open('config.json') as config_file:
    config = json.load(config_file)

# OpenAI API access key
openai.api_key = config['OPENAI_KEY']

In [None]:
# Set filepath variables
entities_fp = "data/isin_companies.csv"
assets_fp = "data/asset_data.csv"
assets_df = pd.read_csv(assets_fp, keep_default_na=False)

# Assets to analyze
assets_df.head()

In [None]:
for index, row in assets_df.iterrows():
    # Extract asset data
    asset_name = row["name"]
    asset_ownership_name = row["asset_ownership_name"]
    country = row["country"]

    # Initialise AssetAnalyzer class
    asset_analyzer = AssetAnalyzer(
        entities_fp,
        asset_name,
        asset_ownership_name,
        country,
    )

    # Data preprocessing
    asset_analyzer.format_country_names()
    asset_analyzer.remove_special_characters()

    # Get company matches shortlist from fuzzy match
    asset_analyzer.check_fuzzy_entity_matches('company_name', 60)

    # Store potential matches in dataframe
    assets_df.loc[index, "Potential matches"] = str(asset_analyzer.potential_matches)

    # If potential matches are identified, use LLM to find closest match
    if asset_analyzer.potential_matches:
        match = asset_analyzer.check_llm_match()
        assets_df.loc[index, "LLM match"] = str(match)
        # Extract ISIN for company match
        isin = asset_analyzer.match_company_to_isin('Entity ISIN', 'company_name', match)
        assets_df.loc[index, "ISIN match"] = str(isin)


In [None]:
assets_df.to_csv('data/assets_with_matches.csv', encoding = 'utf-8')