In [1]:
"""main wrapper for IPC safety stock reset"""

import os
import sys
import argparse

import pandas as pd
import datetime as dt
from dateutil.tz import gettz
from ast import literal_eval

In [2]:
# To add path so that we can improt zeno_etl_libs from local folder
sys.path.append('../../../..')

In [3]:
from zeno_etl_libs.helper.aws.s3 import S3
from zeno_etl_libs.db.db import DB, PostGre
from zeno_etl_libs.helper import helper
from zeno_etl_libs.logger import get_logger

In [4]:
from zeno_etl_libs.utils.ipc.forecast_reset import ipc_forecast_reset

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from zeno_etl_libs.utils.warehouse.wh_intervention.store_portfolio_consolidation import stores_ss_consolidation

In [6]:
from zeno_etl_libs.utils.ipc.goodaid_substitution import update_ga_ss

In [7]:
from zeno_etl_libs.utils.ipc.npi_exclusion import omit_npi_drugs

In [8]:
from zeno_etl_libs.utils.ipc.post_processing import post_processing

In [9]:
from zeno_etl_libs.utils.ipc.doid_update_ss import doid_update

# main

In [10]:
def main(debug_mode, reset_stores, reset_date, type_list, reset_store_ops,
         goodaid_ss_flag, ga_inv_weight, rest_inv_weight, top_inv_weight,
         chronic_max_flag, wh_gen_consolidation, v5_active_flag, v6_active_flag,
         v6_type_list, v6_ptr_cut_off, v3_active_flag,
         omit_npi, corrections_selling_probability_cutoff,
         corrections_cumulative_probability_cutoff, drug_type_list_v4,
         rs_db_read, rs_db_write, read_schema, write_schema, logger):

    s3 = S3()
    logger.info(f"Debug Mode: {debug_mode}")
    status = 'Failed'
    if v3_active_flag == 'Y':
        corrections_flag = True
    else:
        corrections_flag = False

    # Define empty DF if required in case of fail
    order_value_all = pd.DataFrame()
    new_drug_entries = pd.DataFrame()
    missed_entries = pd.DataFrame()

    logger.info("Forecast pipeline starts...")
    try:
        for store_id in reset_stores:
            logger.info("IPC SS calculation started for store id: " + str(store_id))

            # RUNNING FORECAST PIPELINE AND SAFETY STOCK CALC
            drug_class, weekly_fcst, safety_stock_df, df_corrections, \
                df_corrections_111, drugs_max_to_lock_ipcv6, \
                drug_rejects_ipcv6 = ipc_forecast_reset(
                    store_id, type_list, reset_date, corrections_flag,
                    corrections_selling_probability_cutoff,
                    corrections_cumulative_probability_cutoff,
                    rs_db_read, read_schema,
                    drug_type_list_v4=drug_type_list_v4,
                    v5_active_flag=v5_active_flag,
                    v6_active_flag=v6_active_flag,
                    v6_type_list=v6_type_list,
                    v6_ptr_cut_off=v6_ptr_cut_off,
                    chronic_max_flag=chronic_max_flag,
                    logger=logger)

            # WAREHOUSE GENERIC SKU CONSOLIDATION
            if wh_gen_consolidation == 'Y':
                safety_stock_df, consolidation_log = stores_ss_consolidation(
                    safety_stock_df, rs_db_read, read_schema,
                    min_column='safety_stock', ss_column='reorder_point',
                    max_column='order_upto_point')

            # GOODAID SAFETY STOCK MODIFICATION
            if goodaid_ss_flag == 'Y':
                safety_stock_df, good_aid_ss_log = update_ga_ss(
                    safety_stock_df, store_id, rs_db_read, read_schema,
                    ga_inv_weight, rest_inv_weight,
                    top_inv_weight, substition_type=['generic'],
                    min_column='safety_stock', ss_column='reorder_point',
                    max_column='order_upto_point', logger=logger)

            # OMIT NPI DRUGS
            if omit_npi == 'Y':
                safety_stock_df = omit_npi_drugs(safety_stock_df, store_id,
                                                 reset_date, rs_db_read,
                                                 read_schema, logger)

            # POST PROCESSING AND ORDER VALUE CALCULATION
            drug_class, weekly_fcst, safety_stock_df, \
                order_value = post_processing(store_id, drug_class, weekly_fcst,
                                              safety_stock_df, rs_db_read,
                                              read_schema,  logger)
            order_value_all = order_value_all.append(order_value, ignore_index=True)

            # WRITING TO RS-DB
            if debug_mode == 'N':
                logger.info("Writing table to RS-DB")
                # writing table ipc-forecast
                weekly_fcst.rename(
                    columns={'date': 'week_begin_dt', 'fcst': 'point_forecast',
                             'std': 'forecast_deviation'}, inplace=True)
                weekly_fcst['forecast_date'] = dt.datetime.strptime(reset_date, '%Y-%m-%d').date()
                weekly_fcst['week_begin_dt'] = weekly_fcst['week_begin_dt']
                weekly_fcst['created-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                weekly_fcst['created-by'] = 'etl-automation'
                weekly_fcst['updated-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                weekly_fcst['updated-by'] = 'etl-automation'
                weekly_fcst.columns = [c.replace('_', '-') for c in weekly_fcst.columns]
                table_info = helper.get_table_info(db=rs_db_write,
                                                   table_name='ipc-forecast',
                                                   schema=write_schema)
                columns = list(table_info['column_name'])
                weekly_fcst = weekly_fcst[columns]  # required column order

                logger.info("Writing to table: ipc-forecast")
                s3.write_df_to_db(df=weekly_fcst,
                                  table_name='ipc-forecast',
                                  db=rs_db_write, schema=write_schema)

                # writing table ipc-safety-stock
                safety_stock_df['reset_date'] = dt.datetime.strptime(reset_date, '%Y-%m-%d').date()
                safety_stock_df['created-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                safety_stock_df['created-by'] = 'etl-automation'
                safety_stock_df['updated-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                safety_stock_df['updated-by'] = 'etl-automation'
                safety_stock_df.columns = [c.replace('_', '-') for c in safety_stock_df.columns]
                table_info = helper.get_table_info(db=rs_db_write,
                                                   table_name='ipc-safety-stock',
                                                   schema=write_schema)
                columns = list(table_info['column_name'])
                safety_stock_df = safety_stock_df[columns]  # required column order

                logger.info("Writing to table: ipc-safety-stock")
                s3.write_df_to_db(df=safety_stock_df,
                                  table_name='ipc-safety-stock',
                                  db=rs_db_write, schema=write_schema)

                # writing table ipc-abc-xyz-class
                drug_class['reset_date'] = dt.datetime.strptime(reset_date, '%Y-%m-%d').date()
                drug_class['created-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                drug_class['created-by'] = 'etl-automation'
                drug_class['updated-at'] = dt.datetime.now(
                    tz=gettz('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
                drug_class['updated-by'] = 'etl-automation'
                drug_class.columns = [c.replace('_', '-') for c in drug_class.columns]
                table_info = helper.get_table_info(db=rs_db_write,
                                                  table_name='ipc-abc-xyz-class',
                                                  schema=write_schema)
                columns = list(table_info['column_name'])
                drug_class = drug_class[columns]  # required column order

                logger.info("Writing to table: ipc-abc-xyz-class")
                s3.write_df_to_db(df=drug_class,
                                  table_name='ipc-abc-xyz-class',
                                  db=rs_db_write, schema=write_schema)

                # to write ipc v6 tables ...

                # UPLOADING MIN, SS, MAX in DOI-D
                logger.info("Updating new SS to DrugOrderInfo-Data")
                safety_stock_df.columns = [c.replace('-', '_') for c in safety_stock_df.columns]
                ss_data_upload = safety_stock_df.query('order_upto_point > 0')[
                    ['store_id', 'drug_id', 'safety_stock', 'reorder_point',
                     'order_upto_point']]
                ss_data_upload.columns = ['store_id', 'drug_id', 'corr_min',
                                          'corr_ss', 'corr_max']
                new_drug_entries_str, missed_entries_str = doid_update(
                    ss_data_upload, type_list, rs_db_write, write_schema, logger)
                new_drug_entries = new_drug_entries.append(new_drug_entries_str)
                missed_entries = missed_entries.append(missed_entries_str)

                logger.info("All writes to RS-DB completed!")

                # INTERNAL TABLE SCHEDULE UPDATE - OPS ORACLE
                # logger.info(f"Rescheduling SID:{store_id} in OPS ORACLE")
                # if reset_store_ops != None:
                #     content_type = 74
                #     object_id = reset_store_ops.loc[
                #         reset_store_ops[
                #             'store_id'] == store_id, 'object_id'].unique()
                #     for obj in object_id:
                #         request_body = {
                #             "object_id": int(obj), "content_type": content_type}
                #         api_response, _ = django_model_execution_log_create_api(
                #             request_body)
                #         reset_store_ops.loc[
                #             reset_store_ops['object_id'] == obj,
                #             'api_call_response'] = api_response

            else:
                logger.info("Writing to RS-DB skipped")

        status = 'Success'
        logger.info(f"IPC code execution status: {status}")

    except Exception as error:
        logger.exception(error)
        logger.info(f"IPC code execution status: {status}")

    return order_value_all, new_drug_entries, missed_entries

In [11]:
# Parameter passing

In [12]:
env = "stage"

In [13]:
os.environ['env'] = env

In [14]:
email_to = "vivek.revi@zeno.health"
debug_mode = "N"

# JOB EXCLUSIVE PARAMS
exclude_stores = [52, 60, 92, 243, 281]
goodaid_ss_flag = "Y"
ga_inv_weight = 0.5
rest_inv_weight = 0.0
top_inv_weight = 1
chronic_max_flag = "N"
wh_gen_consolidation = "Y"
v5_active_flag = "N"
v6_active_flag = "N"
v6_type_list = ['ethical', 'generic', 'others']
v6_ptr_cut_off = 400
reset_date = "YYYY-MM-DD"
reset_stores = [2]
v3_active_flag = "N"
corrections_selling_probability_cutoff = "{'ma_less_than_2': 0.40, 'ma_more_than_2' : 0.40}"
corrections_cumulative_probability_cutoff = "{'ma_less_than_2':0.50,'ma_more_than_2':0.63}"
drug_type_list_v4 = "{'generic':'{0:[0,0,0], 1:[0,0,1], 2:[0,1,2],3:[1,2,3]}','ethical':'{0:[0,0,0], 1:[0,0,1], 2:[0,1,2],3:[1,2,3]}','others':'{0:[0,0,0], 1:[0,1,2], 2:[0,1,2],3:[1,2,3]}'}"
omit_npi = 'N'

In [15]:
# EVALUATE REQUIRED JSON PARAMS
corrections_selling_probability_cutoff = literal_eval(
    corrections_selling_probability_cutoff)
corrections_cumulative_probability_cutoff = literal_eval(
    corrections_cumulative_probability_cutoff)
drug_type_list_v4 = literal_eval(drug_type_list_v4)

In [16]:
logger = get_logger()
rs_db_read = DB(read_only=True)
rs_db_write = DB(read_only=False)
read_schema = 'prod2-generico'
write_schema = 'prod2-generico'

In [17]:
# open RS connection
rs_db_read.open_connection()
rs_db_write.open_connection()

<redshift_connector.cursor.Cursor at 0x28d30c610>

In [18]:
if reset_date == 'YYYY-MM-DD':  # Take current date
    reset_date = dt.date.today().strftime("%Y-%m-%d")

In [19]:
if 0:
    pass
else:
    type_list = "('ethical', 'ayurvedic', 'generic', 'discontinued-products', " \
                "'banned', 'general', 'high-value-ethical', 'baby-product'," \
                " 'surgical', 'otc', 'glucose-test-kit', 'category-2', " \
                "'category-1', 'category-4', 'baby-food', '', 'category-3')"
    reset_store_ops = None

In [20]:
""" calling the main function """
order_value_all, new_drug_entries, \
    missed_entries = main(
        debug_mode, reset_stores, reset_date, type_list, reset_store_ops,
        goodaid_ss_flag, ga_inv_weight, rest_inv_weight, top_inv_weight,
        chronic_max_flag, wh_gen_consolidation, v5_active_flag,
        v6_active_flag, v6_type_list, v6_ptr_cut_off, v3_active_flag,
        omit_npi, corrections_selling_probability_cutoff,
        corrections_cumulative_probability_cutoff, drug_type_list_v4,
        rs_db_read, rs_db_write, read_schema, write_schema, logger)

2022-05-09 10:44:34,093 - root - INFO - Debug Mode: N
2022-05-09 10:44:34,095 - root - INFO - Forecast pipeline starts...
2022-05-09 10:44:34,095 - root - INFO - IPC SS calculation started for store id: 2
Date range 2019-04-01 2022-05-09
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1205 entries, 0 to 1204
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   sales_date          1205 non-null   object
 1   drug_id             1205 non-null   int64 
 2   net_sales_quantity  1205 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 28.4+ KB
sales_history: None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27504 entries, 0 to 27503
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   attributed_loss_date  27504 non-null  object 
 1   drug_id               27504 non-null  int64  
 2   loss_quantity         27504 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cal_sales_weekly['week_begin_dt'] = cal_sales_weekly.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cal_sales_weekly['month_begin_dt'] = cal_sales_weekly.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cal_sales_monthly['week_begin_dt'] = cal_sales_monthly.apply(
A value is trying to b

Distinct no of drugs 6
Distinct dates 1134
DF shape 6804
2017-05-13
6
6
161   2022-05-02
160   2022-04-25
159   2022-04-18
158   2022-04-11
Name: date, dtype: datetime64[ns]
6
6
72
6
  bucket_abc bucket_xyz  drug_id  net_sales  avg_sales_per_drug  sales_cov  \
0          A          Y        1  52.166667           52.166667   0.380749   
1          B          X        1  29.333333           29.333333   0.294101   
2          B          Y        1   6.000000            6.000000   0.846860   
3          B          Z        1   5.000000            5.000000   1.187817   
4          C          Y        1   2.750000            2.750000   0.761587   
5          C          Z        1   1.250000            1.250000   1.411640   

   net_sales_frac  drug_frac  
0           54.06      16.67  
1           30.40      16.67  
2            6.22      16.67  
3            5.18      16.67  
4            2.85      16.67  
5            1.30      16.67  


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Metal device set to: Apple M1
Metal device set to: Apple M1


2022-05-09 10:44:47.808607: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-09 10:44:47.808607: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-09 10:44:47.808746: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-05-09 10:44:47.808755: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL,

Run time  343.4710421562195
Run time  0.015439987182617188


2022-05-09 10:50:26.336802: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-09 10:50:26.404737: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
[Parallel(n_jobs=4)]: Done   2 out of   2 | elapsed:  5.7min remaining:    0.0s
[Parallel(n_jobs=4)]: Done   2 out of   2 | elapsed:  5.7min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.



systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Initial log joint probability = -3.44444
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       255.301   5.22411e-05       52.2026       4.012      0.4012      120   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     134       255.302   2.78558e-08       48.6014           1           1      164   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Initial log joint probability = -10.375
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       137.791   2.53148e-05       57.0517           1           1      121   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     133       137.792   7.52277e-09       60.3339      0.5674      0.5674      163   
Optim

[Parallel(n_jobs=4)]: Done   2 out of   2 | elapsed:    3.7s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   2 out of   2 | elapsed:    3.7s finished


Run time  3.7527899742126465
6
AX 0.5
AY 0.5
AZ 0.5
BX 0.5
BY 0.6
BZ 0.6
CX 0.5
CY 0.6
CZ 0.6
2022-01-31 2022-05-02 00:00:00




PostGre DB connection closed successfully!
4.37 1.8
2022-05-09 10:50:34,007 - root - INFO - Getting last 4 week data for base heuristic from2022-04-11to2022-05-09
getting data for store 2
2022-05-09 10:50:48,617 - root - INFO - Out of total line items 6
Decreased: Total 4
Decreased: Generic 2
Decreased: Ethical 2
Increased: Total 0
Increased: Generic 0
Increased: Ethical 0
6
  bucket  max_value
0     AY   141.9438
1     BX   931.9930
2     BY    24.0340
3     BZ   140.6320
4     CY   572.1100
5     CZ   917.8892
5.90625
2728.6020000000003
corrections block skipped :
Cases with 0 max: 0 for drug_type:generic 
Cases with 1 max: 1 for drug_type:generic 
Cases with 2 max: 0 for drug_type:generic 
Cases with 3 max: 0 for drug_type:generic 
Cases with 0 max: 0 for drug_type:ethical 
Cases with 1 max: 0 for drug_type:ethical 
Cases with 2 max: 1 for drug_type:ethical 
Cases with 3 max: 0 for drug_type:ethical 
Cases with 0 max: 0 for drug_type:others 
Cases with 1 max: 0 for drug_type:others 

  ga_composition_ss_agg = ga_composition_ss.groupby(
  good_aid_ss_log = good_aid_ss_log.append(prev_rest_sku_ss)
  good_aid_ss_log = good_aid_ss_log.append(prev_top_sku_ss)
  good_aid_ss_log = good_aid_ss_log.append(prev_ga_sku_ss)


2022-05-09 10:51:34,728 - root - INFO - Writing table to RS-DB


  order_value_all = order_value_all.append(order_value, ignore_index=True)


2022-05-09 10:51:35,161 - root - INFO - Writing to table: ipc-forecast
S3 object(uri: s3://aws-glue-temporary-921939243643-ap-south-1/temp_1652073695162.csv) delete response: {'ResponseMetadata': {'RequestId': '0HFD63S8856Y6E6T', 'HostId': 'g3izOEahDTV6bnF/R/jjEsttCx6zK36NtYR6oTkbEgX4zVuL4Y5QbmTlnPIHgDOtN+bh5jSBKJQ=', 'HTTPStatusCode': 204, 'HTTPHeaders': {'x-amz-id-2': 'g3izOEahDTV6bnF/R/jjEsttCx6zK36NtYR6oTkbEgX4zVuL4Y5QbmTlnPIHgDOtN+bh5jSBKJQ=', 'x-amz-request-id': '0HFD63S8856Y6E6T', 'date': 'Mon, 09 May 2022 05:21:49 GMT', 'server': 'AmazonS3'}, 'RetryAttempts': 0}}
2022-05-09 10:51:49,151 - root - INFO - Writing to table: ipc-safety-stock
S3 object(uri: s3://aws-glue-temporary-921939243643-ap-south-1/temp_1652073709152.csv) delete response: {'ResponseMetadata': {'RequestId': 'VR3ZPB6R4RN8D7TR', 'HostId': 'SPoOwlSGBDhllN6kQvHe2u4Hy7NJyBwO0vAuuRTfDBN6v4E1A9e+81yv8jwGcMWtCZCZvZqI8ZY=', 'HTTPStatusCode': 204, 'HTTPHeaders': {'x-amz-id-2': 'SPoOwlSGBDhllN6kQvHe2u4Hy7NJyBwO0vAuuRTfDBN6

  new_drug_entries = new_drug_entries.append(


2022-05-09 10:52:50,570 - root - INFO - Entries updated successfully: 0
2022-05-09 10:52:50,572 - root - INFO - Entries not updated successfully: 4970
2022-05-09 10:52:50,627 - root - INFO - All writes to RS-DB completed!
2022-05-09 10:52:50,628 - root - INFO - IPC code execution status: Success


  missed_entries = missed_entries.append(
  new_drug_entries = new_drug_entries.append(new_drug_entries_str)
  missed_entries = missed_entries.append(missed_entries_str)


In [21]:
# close RS connection
rs_db_read.close_connection()
rs_db_write.close_connection()

Redshift DB connection closed successfully.
Redshift DB connection closed successfully.


In [22]:
# SENT EMAIL ATTACHMENTS
logger.info("Sending email attachments..")
# to write ..............

2022-05-09 10:52:50,639 - root - INFO - Sending email attachments..


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  supervised_df.iloc[0:-n_test]['sample_flag'] = 'train'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  supervised_df.iloc[-n_test:]['sample_flag'] = 'validation'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  supervised_scaled_df.iloc[0:-n_test]['sample_flag'] = 'train'
A value is trying to be set o