# New Incremental Data

Assuming that we have already done the first load and our data is in the datawarehouse.
For new incremental data or second and subsequent runs, we will run this file which makes sure that if there is a new field in the dimensions then we will add that and make appropriate addition to the surrogate keys as well, including changes to the data warehouse

# New data

In [39]:
import pandas as pd
import os
from datetime import datetime


def add_scd2_columns(df):
    df['start_date'] = datetime.now()
    df['end_date'] = pd.to_datetime('2262-04-11')
    df['active_flag'] = 'Y'
    return df

new_accounts_df = pd.read_excel("Processed_file\Cleaned_data\cleaned_accounts.xlsx", sheet_name='Sheet1')
new_deposits_df = pd.read_excel("Processed_file\Cleaned_data\cleaned_deposits.xlsx", sheet_name='Sheet1')
new_loans_df = pd.read_excel("Processed_file\Cleaned_data\cleaned_loans.xlsx", sheet_name='Sheet1')


new_accounts_df.rename(columns={'amount': 'account_amount'}, inplace=True)
new_deposits_df.rename(columns={'amount': 'deposit_amount'}, inplace=True)
new_loans_df.rename(columns={'amount': 'loan_amount'}, inplace=True)

# Existing Data

In [73]:
import pandas_gbq
from google.oauth2 import service_account
import pandas as pd
# Existing Data


# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Function to load table from BigQuery into a DataFrame
def load_table_from_bigquery(table_name):
    query = f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`"
    return pandas_gbq.read_gbq(query, project_id=project_id, credentials=credentials)

# Load the dimension tables
existing_customers_df = load_table_from_bigquery('unique_customers')
existing_countries_df = load_table_from_bigquery('unique_countries')
existing_currencies_df = load_table_from_bigquery('unique_currencies')
existing_account_names_df = load_table_from_bigquery('unique_account_names')
existing_date_dimension_df = load_table_from_bigquery('date_dimension')
existing_loan_type_df = load_table_from_bigquery('loan_type')
existing_deposit_type_df = load_table_from_bigquery('deposit_type')

# Load the fact tables
existing_fact_deposits_df = load_table_from_bigquery('fact_deposits')
existing_fact_accounts_df = load_table_from_bigquery('fact_accounts')
existing_fact_loans_df = load_table_from_bigquery('fact_loans')

# Now you can use the dataframes as needed
# print(existing_customers_df.head())
# print(existing_fact_loans_df.head())

  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(


# Update Dimension Table

## Update new Customer Dimension

In [41]:
import pandas as pd
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'unique_customers'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)


# Identify unique customers from new deposits and loans
unique_new_customers_deposits = new_deposits_df[['customer', 'customer_type']].drop_duplicates()
unique_new_customers_loans = new_loans_df[['customer', 'customer_type']].drop_duplicates()

# Combine the new unique customers and remove duplicates
all_unique_new_customers = pd.concat([unique_new_customers_deposits, unique_new_customers_loans]).drop_duplicates()

# Find new customers that are not already in the existing customers table
new_customers = pd.merge(all_unique_new_customers, existing_customers_df[['customer', 'customer_type']],
                         on=['customer', 'customer_type'], how='left', indicator=True)
new_customers = new_customers[new_customers['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new customers and SCD Type 2 columns
if not new_customers.empty:
    max_existing_key = existing_customers_df['customer_key'].max()
    new_customers['customer_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_customers))
    new_customers = add_scd2_columns(new_customers)

    # Upload new customers to BigQuery
    table_id = f'{project_id}.{dataset_id}.{table_name}'
    new_customers.to_gbq(table_id, project_id=project_id, if_exists='append', credentials=credentials)

    print("New customers have been added to BigQuery.")
else:
    print("No new customers to add.")


No new customers to add.


## Update new Country Dimension

In [42]:
import pandas as pd
import numpy as np
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'unique_countries'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)



# Identify unique countries from new deposits and loans
unique_countries_deposits = new_deposits_df['country'].unique()
unique_countries_loans = new_loans_df['country'].unique()

# Combine the unique countries and remove duplicates
all_unique_countries = np.unique(np.concatenate((unique_countries_deposits, unique_countries_loans)))

# Create a DataFrame for unique new countries
unique_countries_df = pd.DataFrame(all_unique_countries, columns=['country'])

# Load existing countries from BigQuery
existing_countries_df = pandas_gbq.read_gbq(f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`", project_id=project_id, credentials=credentials)

# Find new countries that are not already in the existing countries table
new_countries = pd.merge(unique_countries_df, existing_countries_df[['country']], on='country', how='left', indicator=True)
new_countries = new_countries[new_countries['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new countries
if not new_countries.empty:
    max_existing_key = existing_countries_df['country_key'].max() if not existing_countries_df.empty else 0
    new_countries['country_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_countries))

    new_countries = add_scd2_columns(new_countries)

    # Upload new countries to BigQuery
    new_countries.to_gbq(f'{dataset_id}.{table_name}', project_id=project_id, if_exists='append', credentials=credentials)

    print("New countries have been added to BigQuery.")
else:
    print("No new countries to add.")


No new countries to add.


  record_batch = self.to_arrow(


## Update new Currency Dimension

In [43]:
import pandas as pd
import numpy as np
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'unique_currencies'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Identify unique currencies from new deposits and loans
unique_currencies_deposits = new_deposits_df['currency'].unique()
unique_currencies_loans = new_loans_df['currency'].unique()

# Combine the unique currencies and remove duplicates
all_unique_currencies = np.unique(np.concatenate((unique_currencies_deposits, unique_currencies_loans)))

# Create a DataFrame for unique new currencies
unique_currencies_df = pd.DataFrame(all_unique_currencies, columns=['currency'])

# Load existing currencies from BigQuery
existing_currencies_df = pandas_gbq.read_gbq(f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`", project_id=project_id, credentials=credentials)

# Find new currencies that are not already in the existing currencies table
new_currencies = pd.merge(unique_currencies_df, existing_currencies_df[['currency']], on='currency', how='left', indicator=True)
new_currencies = new_currencies[new_currencies['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new currencies
if not new_currencies.empty:
    max_existing_key = existing_currencies_df['currency_key'].max() if not existing_currencies_df.empty else 0
    new_currencies['currency_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_currencies))

    new_currencies = add_scd2_columns(new_currencies)
    # Append new currencies to BigQuery
    new_currencies.to_gbq(f'{dataset_id}.{table_name}', project_id=project_id, if_exists='append', credentials=credentials)

    print("New currencies have been added to BigQuery.")
else:
    print("No new currencies to add.")


No new currencies to add.


  record_batch = self.to_arrow(


## Update new Accounts_name Dimension

In [44]:
import pandas as pd
import numpy as np
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'unique_account_names'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)


# Identify unique account names from new accounts
unique_account_names = pd.DataFrame(new_accounts_df['account_name'].unique(), columns=['account_name'])

# Load existing account names from BigQuery
existing_account_names_df = pandas_gbq.read_gbq(f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`", project_id=project_id, credentials=credentials)

# Find new account names that are not already in the existing account names table
new_account_names = pd.merge(unique_account_names, existing_account_names_df[['account_name']],
                             on='account_name', how='left', indicator=True)
new_account_names = new_account_names[new_account_names['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new account names
if not new_account_names.empty:
    max_existing_key = existing_account_names_df['account_name_key'].max() if not existing_account_names_df.empty else 0
    new_account_names['account_name_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_account_names))

    # Use the add_scd2_columns function
    new_account_names = add_scd2_columns(new_account_names)

    # Append new account names to BigQuery
    new_account_names.to_gbq(f'{dataset_id}.{table_name}', project_id=project_id, if_exists='append', credentials=credentials)

    print("New account names have been added to BigQuery.")
else:
    print("No new account names to add.")


No new account names to add.


  record_batch = self.to_arrow(


## Update new Loan Type Dimension

In [45]:
import pandas as pd
import numpy as np
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'loan_type'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Identify unique loan types from new loans
unique_new_loan_types = new_loans_df['loan_type'].unique()

# Create a DataFrame for unique new loan types
new_loan_types_df = pd.DataFrame(unique_new_loan_types, columns=['loan_type'])

# Load existing loan types from BigQuery
existing_loan_type_df = pandas_gbq.read_gbq(f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`", project_id=project_id, credentials=credentials)

# Find new loan types that are not already in the existing loan types table
new_loan_types = pd.merge(new_loan_types_df, existing_loan_type_df[['loan_type']],
                          on='loan_type', how='left', indicator=True)
new_loan_types = new_loan_types[new_loan_types['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new loan types
if not new_loan_types.empty:
    max_existing_key = existing_loan_type_df['loan_type_key'].max() if not existing_loan_type_df.empty else 0
    new_loan_types['loan_type_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_loan_types))

    # Use the add_scd2_columns function
    new_loan_types = add_scd2_columns(new_loan_types)

    # Append new loan types to BigQuery
    new_loan_types.to_gbq(f'{dataset_id}.{table_name}', project_id=project_id, if_exists='append', credentials=credentials)

    print("New loan types have been added to BigQuery.")
else:
    print("No new loan types to add.")


No new loan types to add.


  record_batch = self.to_arrow(


## Update new Deposit Type Dimension

In [46]:
import pandas as pd
import numpy as np
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'deposit_type'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Identify unique deposit types from new deposits
unique_new_deposit_types = new_deposits_df['deposit_type'].unique()

# Create a DataFrame for unique new deposit types
new_deposit_types_df = pd.DataFrame(unique_new_deposit_types, columns=['deposit_type'])

# Load existing deposit types from BigQuery
existing_deposit_type_df = pandas_gbq.read_gbq(f"SELECT * FROM `{project_id}.{dataset_id}.{table_name}`", project_id=project_id, credentials=credentials)

# Find new deposit types that are not already in the existing deposit types table
new_deposit_types = pd.merge(new_deposit_types_df, existing_deposit_type_df[['deposit_type']],
                             on='deposit_type', how='left', indicator=True)
new_deposit_types = new_deposit_types[new_deposit_types['_merge'] == 'left_only'].drop(columns='_merge')

# Assign new surrogate keys to the new deposit types
if not new_deposit_types.empty:
    max_existing_key = existing_deposit_type_df['deposit_type_key'].max() if not existing_deposit_type_df.empty else 0
    new_deposit_types['deposit_type_key'] = range(max_existing_key + 1, max_existing_key + 1 + len(new_deposit_types))

    # Use the add_scd2_columns function
    new_deposit_types = add_scd2_columns(new_deposit_types)

    # Append new deposit types to BigQuery
    new_deposit_types.to_gbq(f'{dataset_id}.{table_name}', project_id=project_id, if_exists='append', credentials=credentials)

    print("New deposit types have been added to BigQuery.")
else:
    print("No new deposit types to add.")


No new deposit types to add.


  record_batch = self.to_arrow(


# Maping Dimensions to Fact

In [47]:
new_fact_accounts_df = new_accounts_df.copy()
new_fact_deposits_df = new_deposits_df.copy()
new_fact_loans_df = new_loans_df.copy()

### Maping date_dimension to Fact_tables
1. Map all the dates with the key 
2. Drop the original columns

In [48]:
# Function to map dates to date keys
def map_date_to_key(df, date_column):
    df[date_column] = pd.to_datetime(df[date_column])
    df[date_column + '_key'] = df[date_column].dt.strftime('%Y%m%d').astype(int)
    return df


# Add date keys to fact_accounts
fact_accounts = map_date_to_key(new_fact_accounts_df, 'reference_date')

# Add date keys to fact_deposits
fact_deposits = map_date_to_key(new_fact_deposits_df, 'start_date')
fact_deposits = map_date_to_key(new_fact_deposits_df, 'maturity_date')
fact_deposits = map_date_to_key(new_fact_deposits_df, 'reference_date')

# Add date keys to fact_loans
fact_loans = map_date_to_key(new_fact_loans_df, 'start_date')
fact_loans = map_date_to_key(new_fact_loans_df, 'maturity_date')
fact_loans = map_date_to_key(new_fact_loans_df, 'reference_date')



In [49]:
new_fact_deposits_df.drop(columns = ['start_date','maturity_date','reference_date'], inplace=True)
new_fact_loans_df.drop(columns = ['start_date','maturity_date','reference_date'], inplace=True)
new_fact_accounts_df.drop(columns=['reference_date'], inplace = True)

### We have all the updated dimension tables in the data warehouse which now includes additional dimensions if included in the incremental data


In [50]:
updated_customers_df = load_table_from_bigquery('unique_customers')
updated_countries_df = load_table_from_bigquery('unique_countries')
updated_currencies_df = load_table_from_bigquery('unique_currencies')
updated_account_names_df = load_table_from_bigquery('unique_account_names')
updated_date_dimension_df = load_table_from_bigquery('date_dimension')
updated_loan_type_df = load_table_from_bigquery('loan_type')
updated_deposit_type_df = load_table_from_bigquery('deposit_type')

  record_batch = self.to_arrow(


  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(
  record_batch = self.to_arrow(


### Maping updated_cutomer_dimension to fact_tables

In [51]:
def map_customer_to_key(df, unique_customers_df):
    df = df.merge(updated_customers_df[['customer', 'customer_key']], on='customer', how='left')
    # df.drop(columns=['customer'], inplace=True)
    return df

# Add customer_key to fact_deposits and drop the original customer column
new_fact_deposits_df = map_customer_to_key(fact_deposits,updated_customers_df )

# Add customer_key to fact_loans and drop the original customer column
new_fact_loans_df = map_customer_to_key(fact_loans,updated_customers_df )





In [52]:
new_fact_deposits_df.drop(columns=['customer','customer_type'], inplace=True)
new_fact_loans_df.drop(columns=['customer','customer_type'], inplace=True)

### Maping updated_account_name dimensions to fact tables

In [53]:
def map_account_name_to_key(df, account_name_df):
    df = df.merge(updated_account_names_df, on='account_name', how='left')

    return df


new_fact_accounts_df = map_account_name_to_key(new_fact_accounts_df, updated_account_names_df)

In [54]:
new_fact_accounts_df.drop(columns=['account_name'],inplace=True)

### Maping Updated_Currency dimension to  fact tables

In [55]:
def map_currency_to_key(df, unique_currencies_df):
    df = df.merge(updated_currencies_df, on='currency', how='left')
    return df


# Add currency_key to fact_deposits and drop the original currency column
new_fact_deposits_df = map_currency_to_key(new_fact_deposits_df, updated_currencies_df)

# Add currency_key to fact_loans and drop the original currency column
new_fact_loans_df = map_currency_to_key(new_fact_loans_df, updated_currencies_df)

In [56]:
new_fact_deposits_df.drop(columns=['currency'], inplace= True)
new_fact_loans_df.drop(columns=['currency'], inplace=True)

### Mapping Updated Country dimension to Fact tables

In [57]:
def map_country_to_key(df, unique_countries_df):
    df = df.merge(updated_countries_df, on='country', how='left')
    return df



# Add country_key to fact_deposits and drop the original country column
new_fact_deposits_df = map_country_to_key(new_fact_deposits_df, updated_countries_df)

# Add country_key to fact_loans and drop the original country column
new_fact_loans_df = map_country_to_key(new_fact_loans_df, updated_countries_df)

In [58]:
new_fact_deposits_df.drop(columns=['country'], inplace= True)
new_fact_loans_df.drop(columns=['country'], inplace=True)

### Mapping Updated Deposit_type dimensions to  Fact tables

In [59]:
def map_deposit_type_to_key(df, deposit_type_df):
    df = df.merge(updated_deposit_type_df, on='deposit_type', how='left')
    return df


# Add deposit_type_key to fact_deposits and drop the original deposit_type column
new_fact_deposits_df = map_deposit_type_to_key(new_fact_deposits_df, updated_deposit_type_df)

In [60]:
new_fact_deposits_df.drop(columns=['deposit_type'], inplace=True)

### Mapping Updated Loan_type dimensions to Fact tables

In [61]:
def map_loan_type_to_key(df, loan_type_df):
    df = df.merge(updated_loan_type_df, on='loan_type', how='left')
    return df



# Add loan_type_key to fact_loans and drop the original loan_type column
new_fact_loans_df = map_loan_type_to_key(new_fact_loans_df, updated_loan_type_df)



In [62]:
new_fact_loans_df.drop(columns=['loan_type'], inplace = True)

# Creating Final new incremental data for Loading in data warehouse

### Getting the Persisted maximum surrogate keys to maintain primary key in fact tables

In [63]:
max_keys_df = load_table_from_bigquery('max_keys')

  record_batch = self.to_arrow(


## Final new Fact accounts data creation

In [64]:

max_accounts_key = max_keys_df[max_keys_df['table_name'] == 'fact_accounts']['max_account_surr_primarykey'].iloc[0]

# Increment the surrogate primary keys for the new records
new_fact_accounts_df['account_surr_primarykey'] = range(int(max_accounts_key) + 1, int(max_accounts_key) + 1 + len(new_fact_accounts_df))

new_fact_accounts_df = new_fact_accounts_df[['account_number', 'account_amount','account_surr_primarykey','account_type', 'ingest_date_time', 'reference_date_key', 'account_name_key']]



## Final new Fact loans data creation

In [65]:
max_loans_key = max_keys_df[max_keys_df['table_name'] == 'fact_loans']['max_surr_primarykey'].iloc[0]

new_fact_loans_df['loans_surr_primarykey'] =  range(int(max_loans_key) + 1, int(max_loans_key) + 1 + len(new_fact_loans_df))
new_fact_loans_df = new_fact_loans_df[['loan_amount','exchange_rate','ingest_date_time','loans_surr_primarykey','start_date_key','maturity_date_key','reference_date_key','customer_key','currency_key','country_key','loan_type_key']]

## Final new Facts Deposits Data creation

In [66]:
max_deposits_key = max_keys_df[max_keys_df['table_name'] == 'fact_deposits']['max_deposits_surr_primarykey'].iloc[0]

new_fact_deposits_df['deposits_surr_primarykey'] = range(int(max_deposits_key) + 1, int(max_deposits_key) + 1 + len(new_fact_deposits_df))
new_fact_deposits_df = new_fact_deposits_df[['deposit_amount','exchange_rate','ingest_date_time','deposits_surr_primarykey','start_date_key','maturity_date_key','reference_date_key','customer_key','currency_key','country_key','deposit_type_key']]

# Append Fact tables to Data warehouse

In [67]:
import pandas as pd
import pandas_gbq
from google.oauth2 import service_account

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Define a function to upload a DataFrame to BigQuery
def upload_to_bigquery(df, table_name, if_exists='append'):
    table_id = f'{dataset_id}.{table_name}'
    df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)

# Append new_fact_deposits_df to the BigQuery table
upload_to_bigquery(new_fact_deposits_df, 'fact_deposits')

# Append new_fact_loans_df to the BigQuery table
upload_to_bigquery(new_fact_loans_df, 'fact_loans')

# Append new_fact_accounts_df to the BigQuery table
upload_to_bigquery(new_fact_accounts_df, 'fact_accounts')

print("Data uploaded to BigQuery successfully.")


  df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)
  df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)
  df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)


Data uploaded to BigQuery successfully.


# Updating the max surrogate key and Persisting the same in the data warehouse

In [72]:
import pandas as pd
import os
import pandas_gbq
from google.oauth2 import service_account

# Create the Data_warehouse folder if it doesn't exist (optional for local operations)
os.makedirs('Data_warehouse', exist_ok=True)

# Replace these with your actual paths and project information
key_path = "C:/Users/epranei/Downloads/calm-cove-423918-t0-ce8d5f6922f1.json"
project_id = 'calm-cove-423918-t0'
dataset_id = 'Advisense'
table_name = 'max_keys'

# Authenticate with Google Cloud
credentials = service_account.Credentials.from_service_account_file(key_path)

# Calculate max surrogate keys from new data
max_deposits_key = new_fact_deposits_df['deposits_surr_primarykey'].max()
max_accounts_key = new_fact_accounts_df['account_surr_primarykey'].max()
max_loans_key = new_fact_loans_df['loans_surr_primarykey'].max()

# Create DataFrame for max keys data
max_keys_data = [
    {"table_name": "fact_deposits", "max_deposits_surr_primarykey": max_deposits_key},
    {"table_name": "fact_accounts", "max_account_surr_primarykey": max_accounts_key},
    {"table_name": "fact_loans", "max_surr_primarykey": max_loans_key}
]
max_keys_df = pd.DataFrame(max_keys_data)

# Fill NaN values and convert to int
max_keys_df['max_deposits_surr_primarykey'] = max_keys_df['max_deposits_surr_primarykey'].fillna(0).astype(int)
max_keys_df['max_account_surr_primarykey'] = max_keys_df['max_account_surr_primarykey'].fillna(0).astype(int)
max_keys_df['max_surr_primarykey'] = max_keys_df['max_surr_primarykey'].fillna(0).astype(int)

# Define a function to upload a DataFrame to BigQuery
def upload_to_bigquery(df, table_name, if_exists='replace'):
    table_id = f'{dataset_id}.{table_name}'
    df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)

# Upload max_keys_df to BigQuery
upload_to_bigquery(max_keys_df, table_name)

print("Max keys data uploaded to BigQuery successfully.")


  df.to_gbq(table_id, project_id=project_id, if_exists=if_exists, credentials=credentials)


Max keys data uploaded to BigQuery successfully.


In [70]:
max_keys_df

Unnamed: 0,table_name,max_deposits_surr_primarykey,max_account_surr_primarykey,max_surr_primarykey
0,fact_deposits,180,0,0
1,fact_accounts,0,32,0
2,fact_loans,0,0,600
