In [None]:
import sys
sys.path.append('./../..')

In [None]:
from zeno_etl_libs.db.db import DB
from zeno_etl_libs.config import set_env_config, EnvNames
from zeno_etl_libs.helper import helper
from zeno_etl_libs.helper.aws.s3 import S3

In [None]:
import numpy as np
import pandas as pd
import boto3
import base64
from botocore.exceptions import ClientError
import time
import json
from datetime import datetime, timedelta

# Set the Environment 

In [None]:
"""
Environment: Activate the env as per the requirement
"""
# env = EnvNames.production
# env = EnvNames.staging
env = EnvNames.development

### Logic starts here

In [None]:
config = set_env_config(env=env)

In [None]:
db = DB(secrets=config.secrets)

In [None]:
cursor = db.open_connection()

In [None]:
cursor

In [None]:
s3 = S3(aws_access_key_id=config.secrets['AWS_ACCESS_KEY_ID'],
            aws_secret_access_key=config.secrets['AWS_SECRET_ACCESS_KEY_ID'])

## Take the new records from patients and insert them into Patient-metadata table

In [None]:
query = '''
    insert
        into
        "prod2-generico"."patients-metadata-2" (id,
        "created-at",
        "updated-at",
        "created-by",
        "etl-status"
        )
    select
        p.id,
        p."created-at",
        getdate(),
        'etl-job',
        'updating'
    from
        "prod2-generico"."patients" p
    inner join (
        select
            "patient-id"
        from
            "prod2-generico"."bills-1-metadata" bm
        group by
            "patient-id"
        -- limit 20
        ) bm1 on
        bm1."patient-id" = p.id
    left join "prod2-generico"."patients-metadata-2" pm on
        pm.id = p.id
    where
        pm.id is null
        -- and date(p."updated-at") between '2021-06-01' and '2021-11-30'
        and (pm."etl-status" != 'updated'
        or pm."etl-status" is null)
--    limit 5;
'''
# last pushed max id filter to be applied

In [None]:
db.execute(query, params=None)

## Update the etl-status using bills-1-metadata update date (TODO)

In [None]:
# Take the new bills and get the patients id and update the etl-status to updating

## Considering only changed patients

In [None]:
query = '''
 select
    id,
    "created-at",
    "updated-at",
    "created-by",
    "etl-status"
from
    "prod2-generico"."patients-metadata-2" pm
where
    "etl-status" = 'updating'
'''

In [None]:
db.execute(query, params=None)

In [None]:
changed_patients: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
if changed_patients is None:
    print("No changes in patients.")
else:
    print(f"Total changes: {len(changed_patients)}")

In [None]:
changed_patients.head(2)

## Fill data from bill-1-metadata table

In [None]:
query = """
    update
        "prod2-generico"."patients-metadata-2" t
    set
        "first-bill-date" = s."first-bill-date",
        "last-bill-date" = s."last-bill-date",
        "first-bill-id" = s."first-bill-id",
        "last-bill-id" = s."last-bill-id",
        "average-bill-value" = s."average-bill-value",
        "total-quantity" = s."total-quantity",
        "quantity-generic" = s."quantity-generic",
        "quantity-chronic" = s."quantity-chronic",
        "quantity-ethical" = s."quantity-ethical",
        "quantity-rx" = s."quantity-rx",
        "quantity-repeatable" = s."quantity-repeatable",
        "quantity-goodaid" = s."quantity-goodaid",
        "quantity-others-type" = s."quantity-others-type",
        "number-of-bills" = s."number-of-bills",
        "hd-bills" = s."hd-bills",
        "is-repeatable" = s."is-repeatable",
        "is-generic" = s."is-generic",
        "is-chronic" = s."is-chronic",
        "is-goodaid" = s."is-goodaid",
        "is-ethical" = s."is-ethical",
        "is-rx" = s."is-rx",
        "is-others-type" = s."is-others-type",
        "hd-flag" = s."hd-flag",
        "ecom-flag" = s."ecom-flag",
        "pr-flag" = s."pr-flag",
        "total-spend" = s."total-spend",
        "spend-generic" = s."spend-generic",
        "promo-min-bill-date" = s."promo-min-bill-date",
        "hd-min-bill-date" = s."hd-min-bill-date",
        "ecom-min-bill-date" = s."ecom-min-bill-date",
        "pr-min-bill-date" = s."pr-min-bill-date",
        "generic-min-bill-date" = s."generic-min-bill-date",
        "goodaid-min-bill-date" = s."goodaid-min-bill-date",
        "ethical-min-bill-date" = s."ethical-min-bill-date",
        "chronic-min-bill-date" = s."chronic-min-bill-date",
        "repeatable-min-bill-date" = s."repeatable-min-bill-date",
        "others-type-min-bill-date" = s."others-type-min-bill-date",
        "digital-payment-min-bill-date" = s."digital-payment-min-bill-date",
        "rx-min-bill-date" = s."rx-min-bill-date",
        "digital-payment-flag" = s."digital-payment-flag",
        "total-mrp-value" = s."total-mrp-value",
        "recency-customer-days" = s."recency-customer-days",
        "system-age-days" = s."system-age-days",
        "quantity-generic-pc" = s."quantity-generic-pc",
        "quantity-chronic-pc" = s."quantity-chronic-pc",
        "quantity-ethical-pc" = s."quantity-ethical-pc",
        "quantity-repeatable-pc" = s."quantity-repeatable-pc",
        "quantity-goodaid-pc" = s."quantity-goodaid-pc",
        "quantity-others-type-pc" = s."quantity-others-type-pc",
        "spend-generic-pc" = s."spend-generic-pc"
    from
        (
        select
            pm.id ,
            min(bm."created-at") as "first-bill-date",
            max(bm."created-at") as "last-bill-date",
            min(bm.id) as "first-bill-id",
            max(bm.id) as "last-bill-id",
            round(sum(bm."total-spend")/ count(distinct bm.id), 4) as "average-bill-value",
            sum(bm."total-quantity") as "total-quantity",
            sum(bm."quantity-generic") as "quantity-generic",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-generic")/ sum(bm."total-quantity"), 4) end as "quantity-generic-pc",
            sum(bm."quantity-chronic") as "quantity-chronic",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-chronic")/ sum(bm."total-quantity"), 4) end as "quantity-chronic-pc",
            sum(bm."quantity-ethical") as "quantity-ethical",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-ethical")/ sum(bm."total-quantity"), 4) end as "quantity-ethical-pc",
            sum(bm."quantity-repeatable") as "quantity-repeatable",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-repeatable")/ sum(bm."total-quantity"), 4) end as "quantity-repeatable-pc",
            sum(bm."quantity-goodaid") as "quantity-goodaid",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-goodaid")/ sum(bm."total-quantity"), 4) end as "quantity-goodaid-pc",
            sum(bm."quantity-others-type") as "quantity-others-type",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-others-type")/ sum(bm."total-quantity"), 4) end as "quantity-others-type-pc",
            sum(bm."quantity-generic" + bm."quantity-ethical") as "quantity-rx",
            case when sum(bm."total-quantity") in (0, null) then -1 else round(100.0 * sum(bm."quantity-generic" + bm."quantity-ethical")/ sum(bm."total-quantity"), 4) end as "quantity-rx-pc",
            count(distinct bm.id) as "number-of-bills",
            count(distinct (case when bm."hd-flag" is true then bm.id else null end)) as "hd-bills",
            case when count(distinct bm.id) in (0, null) then -1 else round(100.0 * count(distinct (case when bm."hd-flag" is true then bm.id else null end))/ count(distinct bm.id), 4) end  as "hd-bills-pc",
            bool_or(bm."is-repeatable") as "is-repeatable",
            bool_or(bm."is-generic") as "is-generic",
            bool_or(bm."is-chronic") as "is-chronic",
            bool_or(bm."is-goodaid") as "is-goodaid",
            bool_or(bm."is-ethical") as "is-ethical",
            bool_or(bm."is-rx") as "is-rx",
            bool_or(bm."is-others-type") as "is-others-type",
            bool_or(bm."hd-flag") as "hd-flag",
            bool_or(bm."ecom-flag") as "ecom-flag",
            bool_or(bm."pr-flag") as "pr-flag",
            bool_or(bm."digital-payment-flag") as "digital-payment-flag",
            sum(bm."total-spend") as "total-spend",
            sum(bm."spend-generic") as "spend-generic",
            case when sum(bm."total-spend") in (0, null) then -1 else round(100.0 * sum(bm."spend-generic")/ sum(bm."total-spend")) end as "spend-generic-pc",
            min(case when bm."promo-code-id" is not null then bm."created-at" else null end) as "promo-min-bill-date",
            min(case when bm."hd-flag" is true then bm."created-at" else null end) as "hd-min-bill-date",
            min(case when bm."ecom-flag" is true then bm."created-at" else null end) as "ecom-min-bill-date",
            min(case when bm."pr-flag" is true then bm."created-at" else null end) as "pr-min-bill-date",
            min(case when bm."is-generic" is true then bm."created-at" else null end) as "generic-min-bill-date",
            min(case when bm."is-goodaid" is true then bm."created-at" else null end) as "goodaid-min-bill-date",
            min(case when bm."is-ethical" is true then bm."created-at" else null end) as "ethical-min-bill-date",
            min(case when bm."is-chronic" is true then bm."created-at" else null end) as "chronic-min-bill-date",
            min(case when bm."is-repeatable" is true then bm."created-at" else null end) as "repeatable-min-bill-date",
            min(case when bm."is-others-type" is true then bm."created-at" else null end) as "others-type-min-bill-date",
            min(case when bm."digital-payment-flag" is true then bm."created-at" else null end) as "digital-payment-min-bill-date",
            min(case when bm."is-rx" is true then bm."created-at" else null end) as "rx-min-bill-date",
            sum(bm."total-mrp-value") as "total-mrp-value",
            case
                when max(bm."created-at") = '0101-01-01' then null
                else datediff(day,
                max(bm."created-at"),
                current_date)
            end as "recency-customer-days",
            case
                when min(bm."created-at") = '0101-01-01' then null
                else datediff(day,
                min(bm."created-at"),
                current_date)
            end as "system-age-days"
        from
            "prod2-generico"."prod2-generico"."patients-metadata-2" pm
        inner join "prod2-generico"."bills-1-metadata" bm on
            pm.id = bm."patient-id"
        where
            pm."etl-status" = 'updating'
        group by
            pm.id
    ) s
    where
        t.id = s.id;
"""

In [None]:
db.execute(query, params=None)

## Adding customer feedback data (nps)

In [None]:
query = """
    select
        p.id,
        f.rating,
        f.suggestion,
        f."store-id",
        s."name" as "store-name",
        f."created-at"
    from
        "prod2-generico"."patients-metadata-2" pm
    inner join "prod2-generico".patients p on
         p.id = pm.id
    inner join "prod2-generico".feedback f on
        f.phone = p.phone
    inner join "prod2-generico".stores s on
        f."store-id" = s."id"
    where pm."etl-status" = 'updating'
"""

In [None]:
db.execute(query, params=None)

In [None]:
nps: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
nps

In [None]:
if len(nps):
    nps.head(2)
    nps['created-at'] = pd.to_datetime(nps['created-at'])
    nps['nps-rating-date'] = nps['created-at'].dt.strftime("%Y-%m-%d")
    nps['is-nps'] = True
    nps = nps.sort_values(by=['id', 'created-at'], ascending=[True, False])

    # Keep only latest entry
    nps['rank'] = nps.groupby(['id']).cumcount() + 1
    nps = nps[nps['rank'] == 1]
    nps.drop('rank', axis='columns', inplace=True)
    nps = nps.rename(
        columns={
            'rating': 'latest-nps-rating',
            'suggestion': 'latest-nps-rating-comment',
            'nps-rating-date': 'latest-nps-rating-date',
            'store-id': 'latest-nps-rating-store-id',
            'store-name': 'latest-nps-rating-store-name'
        }
    )
else:
    nps = pd.DataFrame(columns = ['id', 'created-at', 'nps-rating-date', 'is-nps', 'latest-nps-rating',
                                 'latest-nps-rating-comment', 'latest-nps-rating-date', 'latest-nps-rating-store-id',
                                 'latest-nps-rating-store-name'])

## Referral count 

In [None]:
query = """
    select
        a."patient-id" as id,
        SUM(b."total-used") as "referred-count"
    from
        "prod2-generico"."patients-metadata-2" pm
    left join 
        "prod2-generico"."patients-promo-codes" a on
        pm.id = a."patient-id"
    left join "prod2-generico"."promo-codes" b on
        a."promo-code-id" = b."id"
    where
        b."code-type" = 'referral'
        and pm."etl-status" = 'updating'
    group by
        a."patient-id"
"""

In [None]:
db.execute(query=query)

In [None]:
referral: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
referral.head(1)

## Primary Store, System Age Days and Recency Customer Days 

In [None]:
query = """
    select
        pm.id,
        bm."store-id",
        bm.id as "bill-id",
        bm."total-spend",
        bm."bill-date",
        bm."created-at"
    from
        "prod2-generico"."patients-metadata-2" pm
    inner join 
        "prod2-generico"."bills-1-metadata" bm on
        pm.id = bm."patient-id"
    where
        pm."etl-status" = 'updating'
"""

In [None]:
db.execute(query=query)

In [None]:
patient_bills: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
# purchase interval 
patient_bills_2 = patient_bills.sort_values(by=["id", "bill-date"])

In [None]:
# Fetch previous bill date, against every bill
patient_bills_2['prev-bill-date'] = patient_bills_2.groupby("id")['bill-date'].shift(1)

In [None]:
patient_bills_2['purchase-interval'] = (patient_bills_2['bill-date'] - patient_bills_2['prev-bill-date']).dt.days

In [None]:
patient_bills_avg_std = patient_bills_2.groupby(['id']).agg({'purchase-interval': ['mean', 'std']})

In [None]:
patient_bills_avg_std = patient_bills_avg_std.reset_index(col_level=1)
patient_bills_avg_std.columns = patient_bills_avg_std.columns.droplevel(0)
patient_bills_avg_std.columns = ['id', 'avg-purchase-interval', 'std-purchase-interval']

In [None]:
patient_bills_avg_std.head(2)

In [None]:
# Patient store wise, NOB and Total spend
patient_store_agg = patient_bills.groupby(
    ['id','store-id']).agg({'bill-id': 'nunique','total-spend': 'sum'}).reset_index()

In [None]:
patient_store_agg = patient_store_agg.rename(columns={'bill-id': 'store-bills', 'total-spend': 'store-spend'})

In [None]:
patient_store_agg['rank'] = patient_store_agg.sort_values(
    ['store-bills', 'store-spend'], ascending=[False, False]).groupby(['id']).cumcount() + 1

In [None]:
# Shortlist 1 store per patient
patient_primary_store = patient_store_agg[patient_store_agg['rank'] == 1]
patient_primary_store = patient_primary_store.rename(columns={'store-id': 'primary-store-id'})

In [None]:
patient_primary_store.head(1)

### Previous abv and total spend

In [None]:
# patient_bills_3 = patient_bills.sort_values(by=["id", "created-at"])

In [None]:
# patient_bills_3['temp-nob'] = 1

In [None]:
# patient_bills_3['total-bills'] = patient_bills_3.groupby(['id'], as_index=False)['temp-nob'].cumsum()

In [None]:
# patient_bills_3['previous-total-bills'] = data_bill.groupby(['id'], as_index=False)['temp_nob'].shift(1)

In [None]:
# patient_bills_3['cum-total-spend'] = data_bill.groupby(['id'], as_index=False)['total-spend'].shift(1)

In [None]:
# patient_bills_3.head(2)

In [None]:
# data_bill['previous_total_bills'] = data_bill.groupby(['patient_id'],
#                                                           as_index=False)['temp_nob'].shift(1)
#     merge_data['cum_sum_spend'] = merge_data.groupby(['patient_id'],
#                                                      as_index=False)['total_spend'].cumsum()
#     merge_data['current_abv'] = merge_data['cum_sum_spend'] / merge_data['temp_nob']
#     merge_data['previous_abv'] = merge_data.groupby(['patient_id'],
#                                                     as_index=False)['current_abv'].shift(1)

## Number of drug and primary disease 

In [None]:
query = """
    select
        b."patient-id" ,
        b.id as "bill-id",
        bi."inventory-id",
        i."drug-id"
    from
        "prod2-generico"."patients-metadata-2" pm
    inner join "prod2-generico"."bills-1" b on
        pm.id = b."patient-id"
    inner join "prod2-generico"."bill-items-1" bi on
        b.id = bi."bill-id"
    inner join "prod2-generico"."inventory-1" i on
        bi."inventory-id" = i.id
    inner join "prod2-generico".drugs d on
        i."drug-id" = d.id
    where
        pm."etl-status" = 'updating';
"""

In [None]:
db.execute(query=query)

In [None]:
patient_drugs: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
# Number of drug calculation
patient_drug_agg = patient_drugs.groupby(['patient-id']).agg({'drug-id': "nunique"}).reset_index().rename(
    columns={'drug-id': 'num-drugs', 'patient-id': 'id'})

In [None]:
patient_drug_agg.head(2)

In [None]:
# primary disease calculation
query = """
    select
        b."drug-id",
        a.subgroup
    from
        "prod2-generico".molecules a
    inner join "prod2-generico".composition b on
        a.name = b.molecule
    group by
        b."drug-id",
        a.subgroup;
"""

In [None]:
db.execute(query=query)

In [None]:
drug_subgroup: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
# Merge subgroups, take only relevant columns
patient_drugs_count = patient_drugs.groupby(
    ['patient-id', 'drug-id'])['inventory-id'].count().reset_index().rename(columns={'inventory-id': 'count'})

In [None]:
patient_drugs_subgroup_count = patient_drugs_count.merge(drug_subgroup, how='left', on=['drug-id'])

In [None]:
patient_drugs_subgroup_count.head(2)

In [None]:
# Sub subgroup instances in each patient
patient_subgroup = patient_drugs_subgroup_count.groupby(
    ['patient-id', 'subgroup'])['count'].sum().reset_index().rename(columns={'count': 'drug-count'})

In [None]:
# Rank on use
patient_subgroup = patient_subgroup.sort_values(by=['patient-id', 'drug-count'], ascending=[True, False])
patient_subgroup['rank'] = patient_subgroup.groupby(['patient-id']).cumcount() + 1

In [None]:
# keep top2 subgroups
patient_subgroup_top_2 = patient_subgroup[patient_subgroup['rank'] <= 2]

In [None]:
# Make 2 columns, first for rank1, other for rank2
patient_subgroup_top_2_pivot = patient_subgroup_top_2.pivot(index='patient-id', columns='rank', values='subgroup').reset_index()
patient_subgroup_top_2_pivot.columns = ['patient-id', 'disease-rank1', 'disease-rank2']

In [None]:
# Assignment of primary disease
# If rank1 is not others, then rank1 as it is
# If rank1 is others, and rank2 is null, then rank1 as it is
# If rank1 is others, and rank2 is something, then rank2
patient_subgroup_top_2_pivot['primary-disease'] = np.where(
    (
        (patient_subgroup_top_2_pivot['disease-rank1'] == 'others') & 
        (patient_subgroup_top_2_pivot['disease-rank2'].isnull() == False)
    ),
    patient_subgroup_top_2_pivot['disease-rank2'],
    patient_subgroup_top_2_pivot['disease-rank1']
)

In [None]:
patient_subgroup_top_2_pivot.head(2)

In [None]:
patient_primary_disease = patient_subgroup_top_2_pivot[['patient-id', 'primary-disease']].rename(
    columns={'patient-id': 'id'})

## Value segment calculation

In [None]:
query = """
    select
        pm.id,
        COUNT(distinct bm."id") as "total-bills",
        SUM(bm."total-spend") as "total-spend",
        current_date as "value-segment-calculation-date"
    from
        "prod2-generico"."prod2-generico"."patients-metadata-2" pm
    inner join "prod2-generico"."bills-1-metadata" bm on
        pm.id = bm."patient-id"
    where
        pm."etl-status" = 'updating'
--        and datediff(day, current_date, bm."created-at") between -2 and -1
    group by
        pm.id;
"""

In [None]:
db.execute(query=query)

In [None]:
patient_agg_bill: pd.DataFrame = cursor.fetch_dataframe()

In [None]:
patient_agg_bill['abv'] = np.round(
    patient_agg_bill['total-spend'].astype('float') / patient_agg_bill['total-bills'].astype('float'), 2)

In [None]:
patient_value_segment = patient_agg_bill.sort_values(['total-spend'], ascending=False)

In [None]:
patient_value_segment['rank'] = patient_value_segment['total-spend'].rank(method='dense', ascending=False)

In [None]:
patient_value_segment['cumm-sales'] = patient_value_segment.sort_values(
    ['total-spend'], ascending=False)['total-spend'].cumsum()

In [None]:
len_data = len(patient_value_segment)

In [None]:
def assign_value_segment(row):
    if(row['rank'] <= 0.05*len_data):
        return 'platinum'
    elif ((row['rank'] > 0.05*len_data) & (row['rank'] <= 0.1*len_data)):
        return 'gold'
    elif ((row['rank'] > 0.1*len_data) & (row['rank'] <= 0.2*len_data)):
        return 'silver'
    else:
        return 'others'

In [None]:
patient_value_segment['value-segment'] = patient_value_segment.apply(lambda row: assign_value_segment(row),axis=1)

In [None]:
patient_value_segment

## Merging all data points

In [None]:
patient_data = changed_patients[['id']]

### Feedback (nps)

In [None]:
patient_data = patient_data.merge(nps, how='left', on=['id'])

### Referral

In [None]:
patient_data = patient_data.merge(referral, how='left', on=['id'])

In [None]:
patient_data

### Referral Primary Store

In [None]:
patient_data = patient_data.merge(patient_primary_store[['id','primary-store-id']], how='left', on='id')

### Primary Disease

In [None]:
patient_data = patient_data.merge(patient_primary_disease, how='left', on='id')

### Drug count

In [None]:
patient_data = patient_data.merge(patient_drug_agg, how='left', on='id')

In [None]:
patient_data

### Average and Standard Purchase Interval 

In [None]:
patient_data = patient_data.merge(patient_bills_avg_std, how='left', on='id')

In [None]:
patient_data.head(2)

### Value Segment

In [None]:
patient_data = patient_data.merge(
    patient_value_segment[['id', 'value-segment-calculation-date', 'value-segment']], how='left', on='id')

In [None]:
patient_data

## Create temp table and update (nps and other) from that

In [None]:
patient_temp_table = "patients_data_temp"

In [None]:
db.execute(query=f"DROP table IF EXISTS {patient_temp_table};")

In [None]:
query = f"""
    CREATE TEMP TABLE {patient_temp_table}
    (
        id INTEGER ENCODE az64
        ,"is-nps" bool
        ,"latest-nps-rating" INTEGER  ENCODE az64
        ,"latest-nps-rating-comment" VARCHAR(1500)   ENCODE lzo
        ,"latest-nps-rating-store-id" INTEGER ENCODE az64
        ,"latest-nps-rating-store-name" VARCHAR(765)   ENCODE lzo
        ,"latest-nps-rating-date" date  ENCODE az64
        ,"referred-count" int  ENCODE az64
        ,"primary-store-id" INTEGER ENCODE az64
        ,"num-drugs" INTEGER ENCODE az64
        ,"primary-disease" VARCHAR(100)   ENCODE lzo
        ,"avg-purchase-interval" numeric
        ,"std-purchase-interval" numeric
        ,"value-segment-calculation-date" date
        ,"value-segment" VARCHAR(50)
        ,PRIMARY KEY (id)
    );
"""

In [None]:
db.execute(query=query)

In [None]:
patient_temp_table_info = helper.get_table_info(db=db, table_name=patient_temp_table, schema=None)

In [None]:
# patient_temp_table_info
patient_data.info()

### Fixing the data types

In [None]:
patient_data['latest-nps-rating'] = patient_data['latest-nps-rating'].fillna(-1).astype('int64')
patient_data['latest-nps-rating-store-id'] = patient_data['latest-nps-rating-store-id'].fillna(-1).astype('int64')
patient_data['referred-count'] = patient_data['referred-count'].fillna(-1).astype('int64')
patient_data['num-drugs'] = patient_data['num-drugs'].fillna(-1).astype('int64')

In [None]:
s3.write_df_to_db(
    df=patient_data[list(dict.fromkeys(patient_temp_table_info['column_name']))], 
    db=db, table_name=patient_temp_table, schema=None
)

## Updating the data in patient-metadata-2 table

In [None]:
query = f"""
    update
        "prod2-generico"."patients-metadata-2" t
    set
        "is-nps" = s."is-nps",
        "latest-nps-rating" = s."latest-nps-rating",
        "latest-nps-rating-comment" = s."latest-nps-rating-comment",
        "latest-nps-rating-store-id" = s."latest-nps-rating-store-id",
        "latest-nps-rating-store-name" = s."latest-nps-rating-store-name",
        "latest-nps-rating-date" = s."latest-nps-rating-date",
        "referred-count" = s."referred-count",
        "primary-store-id" = s."primary-store-id",
        "num-drugs" = s."num-drugs",
        "primary-disease" = s."primary-disease",
        "avg-purchase-interval" = s."avg-purchase-interval",
        "std-purchase-interval" = s."std-purchase-interval",
        "value-segment-calculation-date" = s."value-segment-calculation-date",
        "value-segment" = s."value-segment",
        "etl-status" = 'updated'
    from
         {patient_temp_table} s
    where
        t.id = s.id;
"""

In [None]:
db.execute(query=query)

# Closing the DB Connection

In [None]:
db.close_connection()