## Moody's Historical Loader


This parser reads XML files for historical Moody's Rating data and uploads the data to BigQuery.  Currently, we only use Moodys Long ratings.  Presumably there are other ratings in the Moody's historical files, such as credit watch and credit outlook.  Those may be in the instutional or issuer files.  This only looks for credit ratings by security (CUSIP or instrument).  Most of the ratings are long ratings, but there are also a few short ratings.

In [1]:
import gcsfs
import warnings
import pandas as pd
import pickle
import os
from google.cloud import storage
client = storage.Client()
import numpy as np  
import gzip
from google.cloud import bigquery
import os, csv, xmltodict, json
import zipfile36 as zipfile

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/jupyter/creds.json"

In [3]:
fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')

In [4]:
from google.cloud import storage
from zipfile import ZipFile
from zipfile import is_zipfile
import io

def zipextract(bucketname, zipfilename_with_path):

    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucketname)

    destination_blob_pathname = zipfilename_with_path
    
    blob = bucket.blob(destination_blob_pathname)
    zipbytes = io.BytesIO(blob.download_as_string())

    if is_zipfile(zipbytes):
        with ZipFile(zipbytes, 'r') as myzip:
            print(myzip.namelist())
            for contentfilename in myzip.namelist():
                contentfile = myzip.read(contentfilename)
                return contentfile
                #blob = bucket.blob(zipfilename_with_path + "/" + contentfilename)
                #blob.upload_from_string(contentfile)

In [9]:
import xmltodict
xml = zipextract("moodys_ratings","gil@ficc.ai/PFG_Instrument_History_Rating_baseline_weekly/pfg_inst_history_rating_Utf8_Baseline_Weekly_20220619_023000_part10_100.Zip")
instrument_dict = xmltodict.parse(xml)

['pfg_inst_history_rating_Utf8_Baseline_Weekly_20220619_023000_part10_100.xml']


In [69]:
def create_df(xml):
    df = pd.DataFrame()
    #instrument_dict = xmltodict.parse(xml)
    for x in instrument_dict["Instrument_Roots"]["Instrument_Root"]:
        temp_df = pd.DataFrame()
        for q in x["Instrument_Identifiers"]:
            if q != "Instrument_Identifier":
                print (x)
                print("no key")
                break
            else:  
                if isinstance(x["Instrument_Identifiers"]["Instrument_Identifier"],dict):
                    temp_df["cusip"] = [x["Instrument_Identifiers"]["Instrument_Identifier"]['Instrument_ID_Value']]
                    for z in x["Instrument_Ratings"]["Instrument_Rating"]:
                        if isinstance(z,str):
                            temp_df["rating"] = x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Text"]
                            temp_df["rating_type"] = x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Type_Text"]
                            temp_df["rating_valid_from_date"] = [pd.to_datetime(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Local_Date"])]
                            if isinstance(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"],dict):
                                if x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"]["@xsi:nil"] == "true":
                                    temp_df["rating_valid_to_date"] = [pd.to_datetime("2100-01-01")]
                            else: 
                                temp_df["rating_valid_to_date"] = [pd.to_datetime(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"])]
                            temp_df["rating_direction"] = [x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Direction_Text"]]
                            temp_df["rating_reason"] = [x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Reason_Text"]]
                        else:
                            temp_df["rating"] = [z["Rating_Text"]]
                            temp_df["rating_type"] = [z["Rating_Type_Text"]]
                            temp_df["rating_valid_from_date"] = [pd.to_datetime(z["Rating_Local_Date"])]
                            if isinstance(z["Rating_Termination_Date"],dict):
                                if z["Rating_Termination_Date"]["@xsi:nil"] == "true":
                                    temp_df["rating_valid_to_date"] = [pd.to_datetime("2100-01-01")]
                            else: 
                                temp_df["rating_valid_to_date"] = [pd.to_datetime(z["Rating_Termination_Date"])]
                            temp_df["rating_direction"] = [z["Rating_Direction_Text"]]
                            temp_df["rating_reason"] = [z["Rating_Reason_Text"]]
                        #temp_df["watchlist"] = [z["Instrument_Watchlist"]["Watchlist_Direction_Text"]]
                elif isinstance(x["Instrument_Identifiers"]["Instrument_Identifier"],list):
                    for y in x["Instrument_Identifiers"]["Instrument_Identifier"]:
                        if y["ID_Type_Text"]=="CUSIP":
                            temp_df["cusip"] = [y['Instrument_ID_Value']]
                    for z in x["Instrument_Ratings"]["Instrument_Rating"]:
                        if isinstance(z,str):
                            temp_df["rating"] = x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Text"]
                            temp_df["rating_type"] = x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Type_Text"]
                            temp_df["rating_valid_from_date"] = [pd.to_datetime(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Local_Date"])]
                            if isinstance(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"],dict):
                                if x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"]["@xsi:nil"] == "true":
                                    temp_df["rating_valid_to_date"] = [pd.to_datetime("2100-01-01")]
                            else: 
                                temp_df["rating_valid_to_date"] = [pd.to_datetime(x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Termination_Date"])]
                            temp_df["rating_direction"] = [x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Direction_Text"]]
                            temp_df["rating_reason"] = [x["Instrument_Ratings"]["Instrument_Rating"]["Rating_Reason_Text"]]
                        else: 
                            temp_df["rating"] = [z["Rating_Text"]]
                            temp_df["rating_type"] = [z["Rating_Type_Text"]]
                            temp_df["rating_valid_from_date"] = [pd.to_datetime(z["Rating_Local_Date"])]
                            if isinstance(z["Rating_Termination_Date"],dict):
                                if z["Rating_Termination_Date"]["@xsi:nil"] == "true":
                                    temp_df["rating_valid_to_date"] = [pd.to_datetime("2100-01-01")]
                            else: 
                                temp_df["rating_valid_to_date"] = [pd.to_datetime(z["Rating_Termination_Date"])]
                            temp_df["rating_direction"] = [z["Rating_Direction_Text"]]
                            temp_df["rating_reason"] = [z["Rating_Reason_Text"]]
                df = pd.concat([df, temp_df], ignore_index=True)
        continue
    print(len(df))
    return df


In [70]:
create_df(instrument_dict)

{'Instrument_ID': '800608995', 'Deal_number': {'@xsi:nil': 'true'}, 'Class_Code': '833', 'Class_Text': 'Revenue', 'Class_Short_Description': 'REV', 'Dated_Date': '1985-11-15T00:00:00', 'ISO_Currency_Code': 'USD', 'Currency_Multiple_Indicator': 'N', 'Maturity_Date': '2015-03-01T00:00:00', 'Sale_ID': '800135217', 'Maturity_Year': '2015', 'Sale_Date': '1985-12-12T00:00:00', 'Face_Amount_USD': {'@xsi:nil': 'true'}, 'Credit_Linked_Indicator': 'N', 'Takedown_Indicator': 'N', 'Security_Description': {'@xsi:nil': 'true'}, 'Instrument_Type_Code': '1864', 'Instrument_Type_Text': 'Public Finance Default', 'Private_Placement_Code': '1880', 'Private_Placement_Text': 'Public Finance Default', 'Coupon_Type_Code': '25', 'Coupon_Type_Text': 'Fixed', 'Coupon_Type_Short_Description': 'FIX', 'Coupon_Frequency_Code': '21', 'Coupon_Frequency_Text': 'Unknown - For Conversion Use Only', 'Coupon_Frequency_Short_Description': 'UNK', 'Coupon_Rate': '9.2500', 'Instrument_Description': 'DAUGHTERS OF CHARITY-OCONNE

KeyboardInterrupt: 

In [None]:
df.head()

In [None]:
len(df)

In [None]:
PROJECT = 'eng-reactor-287421'
dataset = 'reference_data'
table_id = 'eng-reactor-287421.jesse_tests.moodys_test'

In [None]:
import gcsfs
client = bigquery.Client()
fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')
job_config = bigquery.LoadJobConfig(
    # Specify a (partial) schema. All columns are always written to the
    # table. The schema is used to assist in data type definitions.
    schema=[
        # Specify the type of columns whose type cannot be auto-detected. For
        # example the "title" column uses pandas dtype "object", so its
        # data type is ambiguous.
        bigquery.SchemaField("cusip", bigquery.enums.SqlTypeNames.STRING),
        # Indexes are written if included in the schema by name.
    ],
    # Optionally, set the write disposition. BigQuery appends loaded rows
    # to an existing table by default, but with WRITE_TRUNCATE write
    # disposition it replaces the table with the loaded data.
    write_disposition="WRITE_APPEND",
)

job = client.load_table_from_dataframe(
    df, table_id, job_config=job_config
)  # Make an API request.
job.result()  # Wait for the job to complete.

table = client.get_table(table_id)  # Make an API request.
print(
    "Loaded {} rows and {} columns to {}".format(
        table.num_rows, len(table.schema), table_id
    )
)