In [58]:
from google.cloud import storage
from google.cloud import bigquery
import pandas as pd
import decimal
from io import StringIO
import os
from datetime import datetime, timezone

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/Users/myles/ficc_myles/eng-reactor-287421-1f272e8b3a32.json"

In [3]:
bq_client = bigquery.Client()

In [32]:
def bqa(name, variety="guess", mode="NULLABLE"):
    if variety == "guess":
        suffix = name[-2:]
        if suffix == "_c": variety = "string"
        elif suffix == "_d": variety = "date"
        elif suffix == "_f": variety = "numeric"
        elif suffix == "_l": variety = "numeric"
        elif suffix == "_i": variety = "string"
        elif suffix[0] != "_": variety = "string"
    return bigquery.SchemaField(name,variety,mode)

In [68]:
schema_dict = {'agent': [bqa("action"),
                         bqa("agent_id_l"),
                         bqa("legal_name_c")],
               
               'coupchst': [bqa("action"),
                            bqa("issue_id_l"),
                            bqa("maturity_id_l"),
                            bqa("change_date_d"),
                            bqa("new_coupon_rate_f")],
               
               'varrate': [bqa("action"),
                           bqa("issue_id_l"), 
                           bqa("maturity_id_l"),
                           bqa("initial_interest_rate_f"),
                           bqa("initial_interest_rate_date_d"),
                           bqa("current_interest_rate_f"),
                           bqa("reset_frequency_i"), 
                           bqa("next_variable_rate_date_d"), 
                           bqa("reset_day_c","string"),
                           bqa("reset_month_c"), 
                           bqa("floor_f"),
                           bqa("ceiling_f"), 
                           bqa("rate_calc_method_code_i"),
                           bqa("interest_payment_day_code_c"), 
                           bqa("note_c"), 
                           bqa("instrument_type_c"), 
                           bqa("rate_type_c"),
                           bqa("determination_date_d"), 
                           bqa("effective_date_d"),
                           bqa("valid_until_date_d"),
                           bqa("formula_c"), 
                           bqa("fix_float_ind_i"), 
                           bqa("benchmark_c")],
               
                'varhist': [bqa("action"),
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"),
                            bqa("next_variable_rate_date_d"), 
                            bqa("current_interest_rate_f"), 
                            bqa("determination_date_d"), 
                            bqa("effective_date_d"), 
                            bqa("valid_until_date_d"), 
                            bqa("formula_c"), 
                            bqa("fix_float_ind_i")],
                              
               'issuagnt': [bqa("action","string"),
                            bqa("issue_id_l"),
                            bqa("agent_id_l"), 
                            bqa("agent_role_c")],
               
                'addlcred': [bqa("action"), 
                             bqa("issue_id_l"), 
                             bqa("maturity_id_l"), 
                             bqa("addl_credit_schedule_num_l"), 
                             bqa("addl_credit_type_code_c"), 
                             bqa("row_number_l"),
                             bqa("effective_date_d"), 
                             bqa("expiration_date_d"), 
                             bqa("has_guarantees_i"), 
                             bqa("agent_id_l")],

               'bondinfo': [bqa("action"), 
                            bqa("issue_id_l"),
                            bqa("maturity_id_l"),
                            bqa("cusip_c"), 
                            bqa("coupon_f"), 
                            bqa("maturity_date_d"), 
                            bqa("settlement_date_d"),
                            bqa("maturity_amount_f"), 
                            bqa("series_code_c"), 
                            bqa("active_maturity_flag_i"), 
                            bqa("coupon_code_c"), 
                            bqa("debt_type_c"),
                            bqa("offering_price_f"), 
                            bqa("offering_yield_f"), 
                            bqa("total_maturity_offering_amt_f"), 
                            bqa("tot_mat_amt_outstanding_f"), 
                            bqa("tot_mat_amt_outstanding_date_d"), 
                            bqa("additional_credit_flag_i"), 
                            bqa("addl_credit_schedule_num_i"), 
                            bqa("series_c"), 
                            bqa("default_flag_i"), 
                            bqa("dfrd_int_cnvrsn_date_d"), 
                            bqa("put_flag_i"),
                            bqa("optional_call_flag_i"), 
                            bqa("call_schedule_number_l"), 
                            bqa("redemption_flag_i"), 
                            bqa("prtl_redemption_flag_i"),
                            bqa("reoffered_i"), 
                            bqa("reoffered_yield_f"), 
                            bqa("reoffered_date_d"), 
                            bqa("material_event_flag_i"), 
                            bqa("capital_purpose_c"), 
                            bqa("tax_code_c"), 
                            bqa("state_tax_i"), 
                            bqa("bank_qualified_i"), 
                            bqa("orig_cusip_status_i"), 
                            bqa("orig_cusip_type_i"), 
                            bqa("prior_cusip_c"), 
                            bqa("cusip_change_reason_c"), 
                            bqa("cusip_change_date_d"), 
                            bqa("project_name_c"), 
                            bqa("use_of_proceeds_c"),
                            bqa("security_code_i"), 
                            bqa("sink_fund_type_i"), 
                            bqa("super_sinker_flag_i"), 
                            bqa("registration_type_i"), 
                            bqa("average_life_date_d"), 
                            bqa("dated_date_d"), 
                            bqa("delivery_date_d"),
                            bqa("interest_calc_code_i"), 
                            bqa("first_coupon_date_d"), 
                            bqa("interest_frequency_i"), 
                            bqa("interest_accrual_date_d"), 
                            bqa("depository_type_i"),
                            bqa("denomination_amount_f"), 
                            bqa("bond_insurance_code_c"),
                            bqa("mtg_insurance_code_c"), 
                            bqa("moody_long_rating_c"), 
                            bqa("moody_long_date_d"), 
                            bqa("moody_short_rating_c"),
                            bqa("moody_short_date_d"), 
                            bqa("moody_conditional_c"), 
                            bqa("sp_long_rating_c"), 
                            bqa("sp_long_date_d"), 
                            bqa("sp_short_rating_c"), 
                            bqa("sp_short_date_d"), 
                            bqa("sp_provisional_c"), 
                            bqa("fitch_long_rating_c"),
                            bqa("fitch_long_date_d"), 
                            bqa("fitch_short_rating_c"), 
                            bqa("fitch_short_date_d"), 
                            bqa("fitch_conditional_c"), 
                            bqa("update_date_d"), 
                            bqa("note_c"), 
                            bqa("isin_c"),
                            bqa("cav_maturity_amount_f"), 
                            bqa("next_par_call_date_d"), 
                            bqa("next_par_call_price_f"), 
                            bqa("next_call_date_d"), 
                            bqa("next_call_price_f"), 
                            bqa("sf_accel_pct_f"), 
                            bqa("make_whole_call_flag_i"), 
                            bqa("make_whole_start_date_d"), 
                            bqa("make_whole_end_date_d"),
                            bqa("make_whole_spread_l"), 
                            bqa("make_whole_benchmark_c"),
                            bqa("make_whole_text_c"), 
                            bqa("redeem_method_c"),
                            bqa("sinking_fund_allocation_i"),
                            bqa("source_of_repayment_i"), 
                            bqa("next_int_pay_date_d"), 
                            bqa("last_int_pay_date_d"), 
                            bqa("rule_144a_i"), 
                            bqa("seniority_c")],
               
               'issuinfo': [bqa("action"),
                            bqa("issue_id_l"), 
                            bqa("issuer_long_name_c"), 
                            bqa("state_c"), 
                            bqa("issue_description_c"), 
                            bqa("settlement_type_c"), 
                            bqa("settlement_status_i"), 
                            bqa("gross_spread_f"), 
                            bqa("selling_concession_f"),
                            bqa("reallowance_f"), 
                            bqa("offering_type_c"), 
                            bqa("total_offering_amount_f"), 
                            bqa("offering_date_d"), 
                            bqa("first_coupon_date_d"), 
                            bqa("interest_payment_day_c"), 
                            bqa("interest_payment_month_c"), 
                            bqa("active_issue_i"), 
                            bqa("reoffered_i"), 
                            bqa("special_opt_call_code_c"), 
                            bqa("special_mand_call_code_c"), 
                            bqa("extraordinary_call_flag_i"), 
                            bqa("call_at_cav_indr_i"), 
                            bqa("whole_call_frequency_i"), 
                            bqa("part_call_frequency_i"), 
                            bqa("optional_call_note_c"), 
                            bqa("corporate_backer_l",), 
                            bqa("call_notice_days_l"), 
                            bqa("issuer_short_name_c"), 
                            bqa("issuer_note_c"), 
                            bqa("incremental_denomination_f"), 
                            bqa("total_issue_amount_out_f"), 
                            bqa("green_bond_i")],
               
               'redemptn': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"), 
                            bqa("redemption_type_i"), 
                            bqa("redemption_date_d"), 
                            bqa("redemption_price_f"), 
                            bqa("redemption_amt_f"), 
                            bqa("redemption_rate_at_cav_f"), 
                            bqa("refunding_cusip_c"), 
                            bqa("escrow_type_i"), 
                            bqa("calls_defeased_flag_i"), 
                            bqa("sink_defeased_flag_i"), 
                            bqa("refunding_issue_dtd_d"), 
                            bqa("ref_issue_settlement_date_d",), 
                            bqa("note_c"), 
                            bqa("escrow_percentage_c")],
               
               'partredm': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"), 
                            bqa("partial_call_type_i"), 
                            bqa("partial_call_date_d"), 
                            bqa("row_number_l"), 
                            bqa("partial_call_rate_f"), 
                            bqa("prtl_call_rate_at_cav_f"), 
                            bqa("prtl_call_amt_f"), 
                            bqa("prtl_call_amt_at_cav_f"), 
                            bqa("source_agent_l"), 
                            bqa("note_c")],
               
               'sinkfund': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"), 
                            bqa("sink_date_d"), 
                            bqa("sink_price_f"), 
                            bqa("skip_payment_i"), 
                            bqa("sink_amt_at_maturity_f"),
                            bqa("sink_amt_at_cav_f"), 
                            bqa("note_c")],
               
               'issudflt': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("default_date_d"), 
                            bqa("default_type_i"), 
                            bqa("default_event_type_c"), 
                            bqa("default_event_date_d"), 
                            bqa("default_status_c"), 
                            bqa("default_status_date_d"), 
                            bqa("default_source_c"), 
                            bqa("default_source_date_d"), 
                            bqa("default_note_c"), 
                            bqa("reinstated_i"), 
                            bqa("reinstated_date_d")],
                                 
               'ratingsh': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"),
                            bqa("cusip_c"), 
                            bqa("rating_type_c"), 
                            bqa("rating_c"), 
                            bqa("creditwatch_c"), 
                            bqa("rating_date_d"), 
                            bqa("outlook_c")],           

              'ratncode': [bqa("action"),
                           bqa("type_c"), 
                           bqa("code_c"), 
                           bqa("description_c")],
            
                 'code': [bqa("action"),
                           bqa("code_id_c"), 
                           bqa("code_col_name_c"), 
                           bqa("code_c"), 
                           bqa("code_desc_c")],
                                
              'agentadr': [bqa("action"),
                           bqa("agent_id_l"), 
                           bqa("address_id_l"), 
                           bqa("addr1_c"), 
                           bqa("addr2_c"), 
                           bqa("city_c"), 
                           bqa("state_c"), 
                           bqa("zipcode_c")],
                                
               'ratings': [bqa("action"),
                           bqa("issue_id_l"), 
                           bqa("maturity_id_l"), 
                           bqa("cusip_c"), 
                           bqa("rating_type_c"), 
                           bqa("rating_c"), 
                           bqa("creditwatch_c"), 
                           bqa("rating_date_d"), 
                           bqa("outlook_c")],
                                
               'bondaddl': [bqa("action"),
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"), 
                            bqa("next_int_pay_date_d"), 
                            bqa("last_int_pay_date_d")],
                                
               'varschd': [bqa("action"),
                           bqa("issue_id_l"), 
                           bqa("maturity_id_l"), 
                           bqa("start_date_d"), 
                           bqa("end_date_d"), 
                           bqa("formula_c")],
                                
               'callschd':[bqa("action"), 
                           bqa("issue_id_l"), 
                           bqa("maturity_id_l"), 
                           bqa("call_schedule_number_l"), 
                           bqa("call_date_d"), 
                           bqa("call_price_f")],
                                
               'putsched': [bqa("action"), 
                            bqa("issue_id_l"), 
                            bqa("maturity_id_l"), 
                            bqa("put_or_tender_type_i"), 
                            bqa("put_or_tender_date_d"), 
                            bqa("put_or_tender_price_f"), 
                            bqa("put_or_tender_frequency_i"), 
                            bqa("next_put_or_tender_date_d"), 
                            bqa("final_put_or_tender_date_d"), 
                            bqa("put_or_tender_window1_l"), 
                            bqa("put_or_tender_window2_l"), 
                            bqa("put_fee_f"), 
                            bqa("note_c")]
              }


In [55]:
def get_table_name_from_file_name(file_name):    
    start = file_name.find('/',5)
    end = file_name.find('.UPD')
    return (file_name[start+1:end].lower())

datetime.datetime(2022, 1, 11, 13, 48, 35, 609590, tzinfo=datetime.timezone.utc)

In [102]:
def main(list_file_name):
    for file_name in list_file_name:
        if file_name.find('.zip') == -1:
            storage_client = storage.Client()
            bucket = storage_client.get_bucket('mergent_data_test')

            blob = bucket.blob(file_name)
            data_str = blob.download_as_string()
            data = str(data_str,'utf-8')
            data = StringIO(data)
            table_name = get_table_name_from_file_name(file_name)
            col_names = [schema_dict[table_name][i].name for i in range(len(schema_dict[table_name]))]
            col_names.append('Junk')
            df=pd.read_csv(data, sep='|',names=col_names,index_col=False, usecols=col_names[:-1])
            
            dt = datetime.now().date()
            df['upload_date_d'] = dt.strftime("%Y%m%d") 
            table_schema = schema_dict[table_name] + [bqa("upload_date_d")]

            for col in table_schema:
                print(col)
                if col.field_type.upper() == 'NUMERIC':
                    df[col.name] = df[col.name].astype(str).map(decimal.Decimal)
                elif col.field_type.upper() == 'DATE':
                    df[col.name] = pd.to_datetime(df[col.name].astype(str),format='%Y%m%d').dt.date
                if col.field_type.upper() == 'STRING':
                    df[col.name] = df[col.name].astype(str)

            job_config = bigquery.LoadJobConfig(
            schema=table_schema,
            write_disposition="WRITE_APPEND")
            
            job = bq_client.load_table_from_dataframe(df, 'eng-reactor-287421.mergent_test.' + table_name.upper(), job_config=job_config) 
# 
            job.result()


In [103]:
name_list = ['ADDLCRED.UPD',
             'BONDINFO.UPD',
             'ISSUDFLT.UPD',
             'RATINGS.UPD',
             'VARHIST.UPD',
             'AGENT.UPD',
             'CALLSCHD.UPD',
             'ISSUINFO.UPD',
             'RATINGSH.UPD',
             'VARRATE.UPD',
             'AGENTADR.UPD',
             'COUPCHST.UPD',
             'PARTREDM.UPD',
             'REDEMPTN.UPD',
             'BONDADDL.UPD',
             'ISSUAGNT.UPD',
             'PUTSCHED.UPD',
             'SINKFUND.UPD']

In [106]:
t = main(name_list)

SchemaField('action', 'STRING', 'NULLABLE', None, (), None)
SchemaField('issue_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('maturity_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('addl_credit_schedule_num_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('addl_credit_type_code_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('row_number_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('effective_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('expiration_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('has_guarantees_i', 'STRING', 'NULLABLE', None, (), None)
SchemaField('agent_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('upload_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('action', 'STRING', 'NULLABLE', None, (), None)
SchemaField('issue_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('maturity_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('cusip_c', 'STRING', 'NULLABLE', None, (

SchemaField('action', 'STRING', 'NULLABLE', None, (), None)
SchemaField('issue_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('maturity_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('cusip_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('rating_type_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('rating_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('creditwatch_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('rating_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('outlook_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('upload_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('action', 'STRING', 'NULLABLE', None, (), None)
SchemaField('issue_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('maturity_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('next_variable_rate_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('current_interest_rate_f', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaFie

SchemaField('action', 'STRING', 'NULLABLE', None, (), None)
SchemaField('issue_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('maturity_id_l', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('redemption_type_i', 'STRING', 'NULLABLE', None, (), None)
SchemaField('redemption_date_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('redemption_price_f', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('redemption_amt_f', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('redemption_rate_at_cav_f', 'NUMERIC', 'NULLABLE', None, (), None)
SchemaField('refunding_cusip_c', 'STRING', 'NULLABLE', None, (), None)
SchemaField('escrow_type_i', 'STRING', 'NULLABLE', None, (), None)
SchemaField('calls_defeased_flag_i', 'STRING', 'NULLABLE', None, (), None)
SchemaField('sink_defeased_flag_i', 'STRING', 'NULLABLE', None, (), None)
SchemaField('refunding_issue_dtd_d', 'DATE', 'NULLABLE', None, (), None)
SchemaField('ref_issue_settlement_date_d', 'DATE', 'NULLABLE', None, (), None)
Schem