In [1]:
# ML code
import pandas as pd
from datetime import timedelta
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, roc_auc_score

df = pd.read_csv('/project_data/data_asset/Final_AR Data_shaped_058ec0b9.csv')
# print(df)
# print(df.columns)

df = df[['CUSTOMER', 'FISCPER', 'NETDUE_DATE', 'CLEAR_DATE', 'CUST_CRE_REP_GRP', 'AMOUNT', 'ITEM_STATUS',
         'PAYMENT_TERMS', 'county', 'confirmed_cases', 'deaths']]

print(df)

        CUSTOMER  FISCPER NETDUE_DATE  CLEAR_DATE CUST_CRE_REP_GRP   AMOUNT  \
0         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
1         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
2         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
3         101374  2018001  2018-04-30  2018-02-07               AA   481.49   
4         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
5         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
6         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
7         101374  2018001  2018-04-30  2018-02-07               AA   881.10   
8         101374  2018001  2018-04-30  2018-02-07               AA   324.85   
9         101374  2018001  2018-04-30  2018-02-07               AA   643.47   
10        101374  2018001  2018-04-30  2018-02-07               AA   617.66   
11        101374  2018001  2018-04-30  2018-02-07   

In [3]:
# Prelim pre-processing for target
df['NETDUE_DATE'] = df['NETDUE_DATE'].astype('datetime64[ns]')
df['CLEAR_DATE'] = df['CLEAR_DATE'].astype('datetime64[ns]')
df['DAY_PAID'] = df['NETDUE_DATE'] - df['CLEAR_DATE']
df['ON_TIME_PRED'] = df['DAY_PAID'] >= timedelta(days=0)
print(df)
df['ON_TIME_PRED'] = df['ON_TIME_PRED'].astype(int)
print(df)

df = df.drop('DAY_PAID', axis=1)
print(df.dtypes)

        CUSTOMER  FISCPER NETDUE_DATE CLEAR_DATE CUST_CRE_REP_GRP  AMOUNT  \
0         232752  2019001  2019-01-18 2019-01-18               BB   23.14   
1         232752  2019002  2019-03-01 2019-03-01               BB   23.14   
2         232752  2019002  2019-02-28 2019-02-28               BB   23.14   
3         232752  2019002  2019-02-22 2019-02-22               BB   23.14   
4         232752  2019002  2019-02-14 2019-02-14               BB   23.14   
5         232752  2019003  2019-03-14 2019-03-14               BB   23.14   
6         232752  2019004  2019-04-03 2019-04-03               BB   23.14   
7         232752  2019004  2019-04-04 2019-04-04               BB   23.14   
8         232752  2019004  2019-04-11 2019-04-11               BB   23.14   
9         232752  2019004  2019-04-15 2019-04-15               BB   23.14   
10        232752  2019004  2019-04-17 2019-04-17               BB   23.14   
11        232752  2019004  2019-04-17 2019-04-17               BB   23.14   

CUSTOMER                     int64
FISCPER                      int64
NETDUE_DATE         datetime64[ns]
CLEAR_DATE          datetime64[ns]
CUST_CRE_REP_GRP            object
AMOUNT                     float64
ITEM_STATUS                 object
PAYMENT_TERMS               object
county                      object
confirmed_cases              int64
deaths                       int64
ON_TIME_PRED                 int64
dtype: object


In [4]:
# Check class balance
print(df['ON_TIME_PRED'].value_counts())
print(df['ITEM_STATUS'].value_counts())

df = df.loc[df['ITEM_STATUS'] == 'Closed']
df_train = df[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases', 'ON_TIME_PRED']]

# Label encoding for categorical variables
encode_cust = LabelEncoder()
encode_cust_rep = LabelEncoder()
encode_pay = LabelEncoder()

df_train['CUSTOMER'] = encode_cust.fit_transform(df_train['CUSTOMER'])
df_train['PAYMENT_TERMS'] = encode_pay.fit_transform(df_train['PAYMENT_TERMS'].astype(str))
df_train['CUST_CRE_REP_GRP'] = encode_cust_rep.fit_transform(df_train['CUST_CRE_REP_GRP'])

1    471454
0    389873
Name: ON_TIME_PRED, dtype: int64
Closed    802346
Open       58981
Name: ITEM_STATUS, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
# Dummy model
dummy_classifier = DummyClassifier(strategy='stratified')
dummy_x = df_train[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']]
dummy_y = df_train['ON_TIME_PRED']
dummy_classifier.fit(dummy_x, dummy_y)
print('Dummy model score =', dummy_classifier.score(dummy_x, dummy_y))

Dummy model score = 0.5149212434535724


In [9]:
# Random Forest
df_imp = df_train[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases', 'ON_TIME_PRED']]
X = df_imp[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']]
y = df_imp['ON_TIME_PRED']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

rf_clf = RandomForestClassifier(max_depth=2, random_state=0)
rf_clf.fit(X_train, y_train)
y_train_predicted = rf_clf.predict(X_train)
y_test_predicted = rf_clf.predict(X_test)
f1_train = f1_score(y_train, y_train_predicted)
f1_test = f1_score(y_test, y_test_predicted)
print("Random Forest, Train F1: {} Test F1: {}".format(f1_train, f1_test))
print('Feature importances =', rf_clf.feature_importances_)
print(confusion_matrix(y_test, y_test_predicted))
print("ROC_AUC Score =", roc_auc_score(y_test, y_test_predicted))



Random Forest, Train F1: 0.9471251620637101 Test F1: 0.9476865150851226
Feature importances = [0.38150497 0.19882351 0.00203599 0.22835009 0.18928543]
[[102674   6400]
 [  9717 145984]]
ROC_AUC Score = 0.9394580807854498


In [8]:
# Predictions on Open
pred_df = pd.read_csv('/project_data/data_asset/bq-results-20200505-181914-yglz5y8jrefn.csv')
pred_df2 = pd.read_csv('/project_data/data_asset/MI_County_Covid193.csv_shaped_21d54855.csv')
pred_df = pred_df.rename(columns={"custcounty": "county"})
pred_df = pred_df.merge(pred_df2, how='left', on='county')

pred_df = pred_df[['CUSTOMER', 'CustomerNm','FISCPER', 'NETDUE_DATE', 'CLEAR_DATE', 'CUST_CRE_REP_GRP', 'AMOUNT', 'ITEM_STATUS',
         'PAYMENT_TERMS', 'county', 'confirmed_cases', 'deaths']]
pred_df = pred_df.loc[pred_df['ITEM_STATUS'] == 'Open']
pred_df_2 = pred_df[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']]

pred_df_2['CUSTOMER'] = encode_cust.transform(pred_df_2['CUSTOMER'])
pred_df_2['PAYMENT_TERMS'] = encode_pay.transform(pred_df_2['PAYMENT_TERMS'].astype(str))
pred_df_2['CUST_CRE_REP_GRP'] = encode_cust_rep.transform(pred_df_2['CUST_CRE_REP_GRP'])

pred_X = pred_df_2[['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']]
pred_y = rf_clf.predict(pred_X)
pred_df['Predictions'] = pred_y
print(pred_df['Predictions'].value_counts())

pred_df.loc[(pred_df.Predictions == 0),'Predictions']='Late'
pred_df.loc[(pred_df.Predictions == 1),'Predictions']='On Time'
print(pred_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


0    50553
1     8428
Name: Predictions, dtype: int64
        CUSTOMER          CustomerNm  FISCPER NETDUE_DATE CLEAR_DATE  \
485174    101113          Intec Inc   2019011  2020-01-25        NaN   
485175    101113          Intec Inc   2019011  2020-01-25        NaN   
485176    101113          Intec Inc   2019011  2020-01-25        NaN   
485177    101113          Intec Inc   2019011  2020-01-25        NaN   
485178    101113          Intec Inc   2019011  2020-01-25        NaN   
485179    101113          Intec Inc   2019011  2020-01-25        NaN   
485180    101113          Intec Inc   2019011  2020-01-25        NaN   
485181    101113          Intec Inc   2019011  2020-01-25        NaN   
485182    101113          Intec Inc   2019011  2020-01-26        NaN   
485401    101128             Bournco  2019011  2019-11-22        NaN   
485402    101128             Bournco  2019011  2019-11-22        NaN   
485403    101128             Bournco  2019011  2019-11-22        NaN   
485404    

In [7]:
# # Post Open Transaction Predictions as csv
# from project_lib import Project
# project = Project.access()
# project.save_data("AR_PREDICTIONS.csv", pred_df.to_csv(index=False))

In [75]:
# WML Authentication
from watson_machine_learning_client import WatsonMachineLearningAPIClient
wml_credentials = {
                    "url": "https://default-cpd-default.miracle311-273183-3cfa7977c157a3e8d61067e13964bdf8-0001.us-south.containers.appdomain.cloud",
                    "username": "miracle1",
                    "password": "miracle1",
                    "instance_id": "wml_local",
                    "version" : "2.5.0"
 }
client = WatsonMachineLearningAPIClient(wml_credentials)

In [27]:
# # Set space
# client.set.default_space('e2c4bfa5-0762-4fe4-a942-edc59772f226')
# print(client.deployments.list())

------------------------------------  --------------  -----  ------------------------  -------------
GUID                                  NAME            STATE  CREATED                   ARTIFACT_TYPE
de1bca18-fb9e-4d68-acb2-caa4b8c68878  AR_COVID_BATCH  ready  2020-05-13T21:15:37.249Z  model
74ca9c2f-dda7-4e53-bcd5-8f05200cd910  AR-deploy       ready  2020-05-13T11:00:39.643Z  model
------------------------------------  --------------  -----  ------------------------  -------------
None


In [28]:
# meta_props={
#    client.repository.ModelMetaNames.NAME: "AR_COVID Closing Prediction2",
#    client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3.6",
#    client.repository.ModelMetaNames.TYPE: "scikit-learn_0.20"     
#  }

In [29]:
# model_artifact = client.repository.store_model(rf_clf,
#                                              meta_props=meta_props,
#                                              training_data=X_train,
#                                              training_target=y_train)
# model_uid = client.repository.get_model_uid(model_artifact)
# print("Model UID = " + model_uid)

Model UID = 97ecaa41-9101-4d3b-a2ae-60fb5292e357


In [30]:
# model_details = client.repository.get_details(model_uid)

In [31]:
# import json
# print(json.dumps(model_details, indent=2))

{
  "metadata": {
    "guid": "97ecaa41-9101-4d3b-a2ae-60fb5292e357",
    "id": "97ecaa41-9101-4d3b-a2ae-60fb5292e357",
    "modified_at": "2020-05-14T19:48:11.002Z",
    "created_at": "2020-05-14T19:48:10.002Z",
    "owner": "1000331001",
    "href": "/v4/models/97ecaa41-9101-4d3b-a2ae-60fb5292e357?space_id=e2c4bfa5-0762-4fe4-a942-edc59772f226"
  },
  "entity": {
    "name": "AR_COVID Closing Prediction2",
    "training_data_references": [
      {
        "location": {
          "bucket": "not_applicable"
        },
        "type": "fs",
        "connection": {
          "access_key_id": "not_applicable",
          "secret_access_key": "not_applicable",
          "endpoint_url": "not_applicable"
        },
        "schema": {
          "id": "1",
          "type": "DataFrame",
          "fields": [
            {
              "name": "CUSTOMER",
              "type": "int64"
            },
            {
              "name": "CUST_CRE_REP_GRP",
              "type": "int64"
          

In [34]:
# metaProps = {
# client.deployments.ConfigurationMetaNames.NAME: "AR_COVID_PRED_DEPLOYMENT2",
# client.deployments.ConfigurationMetaNames.ONLINE: {}
# }

In [35]:
# created_deployment = client.deployments.create(model_uid, metaProps)



#######################################################################################

Synchronous deployment creation for uid: '97ecaa41-9101-4d3b-a2ae-60fb5292e357' started

#######################################################################################


initializing
ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='3fab453f-fcf9-4eb0-a84d-530b48b1e5b0'
------------------------------------------------------------------------------------------------




In [36]:
# client.spaces.list()

------------------------------------  ---------------  ------------------------
GUID                                  NAME             CREATED
9da1fe0b-a99a-4bce-b872-60362ce7cf49  AR_COVID_DEPLOY  2020-05-13T20:35:02.075Z
ed08818a-842c-4816-a8fa-b41700efbf45  tomtest          2020-05-12T17:47:28.385Z
5919283a-afb3-404a-a5a7-0af79de49497  backpain         2020-05-12T12:30:49.198Z
e2c4bfa5-0762-4fe4-a942-edc59772f226  ML-deploy        2020-05-11T18:29:59.568Z
------------------------------------  ---------------  ------------------------


In [2]:
from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.supporting_classes import PayloadRecord

aios_credentials = {
                      "instance_guid": "00000000-0000-0000-0000-000000000000",
                      "username": "miracle1",
                      "password": "miracle1",
                      # address should be replaced with ip, port pair to be used in scripts outside ICP
                      "url": "https://default-cpd-default.miracle311-273183-3cfa7977c157a3e8d61067e13964bdf8-0001.us-south.containers.appdomain.cloud"
                   }

client = APIClient4ICP(aios_credentials)
subscription = client.data_mart.subscriptions.get(subscription_uid="f52fac49-a7d6-485d-8cb5-139899857b41")

"""
request_data - input to scoring endpoint in supported by Watson OpenScale format
response_data - output from scored model in supported by Watson OpenScale format
response_time - scoring request response time [ms] (integer type)

Example:

request_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026]]
  }

response_data = {
    "fields": ["AGE", "SEX", "BP", "CHOLESTEROL", "NA", "K", "probability", "prediction", "DRUG"],
    "values": [[28, "F", "LOW", "HIGH", 0.61, 0.026, [0.82, 0.07, 0.0, 0.05, 0.03], 0.0, "drugY"]]
  }
"""

request_data = {
    "fields": ["CUSTOMER", "CUST_CRE_REP_GRP", "AMOUNT", "PAYMENT_TERMS", "confirmed_cases"],
    "values": [[2, 0, 229.62, 5, 311]]
  }
response_data = {
    "fields": ["CUSTOMER", "CUST_CRE_REP_GRP", "AMOUNT", "PAYMENT_TERMS", "confirmed_cases", "probability", "prediction"],
    "values": [[2, 0, 229.62, 5, 311, [0.97, 0.03], 0.0]]
  }

records = [PayloadRecord(request=request_data, response=response_data, response_time=18), 
                PayloadRecord(request=request_data, response=response_data, response_time=12)]

subscription.payload_logging.store(records=records)



In [76]:
col = X_test.columns.to_numpy()
col = list(col)
print(col)

['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']


In [77]:
print(X_test.shape)
test = X_test.sample(frac=0.01, replace=True, random_state=1)
print(test.shape)

(264775, 5)
(2648, 5)


In [78]:
# # Send predictions to WML
deployment_uid = 'cf8d70ab-6021-48b4-9139-1be414bac5c1'

job_payload = {
   client.deployments.ScoringMetaNames.INPUT_DATA: [{"values": test}]
 }
print(job_payload)

{'input_data': [{'values':         CUSTOMER  CUST_CRE_REP_GRP   AMOUNT  PAYMENT_TERMS  confirmed_cases
751262        12                 0   178.00              8              456
388090        17                 1     7.12              3            17298
277723         1                 3    58.74              1               61
56465          3                 3    21.36              1             7518
692918        15                 1    10.68              2               28
332675        17                 1     8.90              3            17298
645915        15                 1    13.35              2               28
154007        12                 0   158.42              8              456
471822         0                 3    24.03              1               93
329013        17                 1    10.68              3            17298
531554        18                 1    11.57              1               16
5947          18                 1    13.35              1   

In [79]:
client.set.default_space('e2c4bfa5-0762-4fe4-a942-edc59772f226')
job_details = client.deployments.score(deployment_uid,
                                        job_payload)
print(job_details)

{'predictions': [{'fields': ['prediction', 'probability'], 'values': [[1, [0.05086008070097716, 0.9491399192990227]], [0, [0.9302309913188346, 0.06976900868116558]], [1, [0.17074764495429345, 0.8292523550457066]], [1, [0.17074764495429345, 0.8292523550457066]], [0, [0.7764890555857545, 0.2235109444142455]], [0, [0.9302309913188346, 0.06976900868116558]], [0, [0.7764890555857545, 0.2235109444142455]], [1, [0.05086008070097716, 0.9491399192990227]], [1, [0.17074764495429345, 0.8292523550457066]], [0, [0.9302309913188346, 0.06976900868116558]], [0, [0.5839644152678586, 0.4160355847321415]], [0, [0.5839644152678586, 0.4160355847321415]], [1, [0.17074764495429345, 0.8292523550457066]], [0, [0.9302309913188346, 0.06976900868116558]], [1, [0.17074764495429345, 0.8292523550457066]], [1, [0.05086008070097716, 0.9491399192990227]], [1, [0.17074764495429345, 0.8292523550457066]], [1, [0.05086008070097716, 0.9491399192990227]], [1, [0.05086008070097716, 0.9491399192990227]], [0, [0.776489055585754

In [80]:
from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.supporting_classes import PayloadRecord

aios_credentials = {
                      "instance_guid": "00000000-0000-0000-0000-000000000000",
                      "username": "miracle1",
                      "password": "miracle1",
                      # address should be replaced with ip, port pair to be used in scripts outside ICP
                      "url": "https://default-cpd-default.miracle311-273183-3cfa7977c157a3e8d61067e13964bdf8-0001.us-south.containers.appdomain.cloud"
                   }

client = APIClient4ICP(aios_credentials)
subscription = client.data_mart.subscriptions.get(subscription_uid="f2281c5a-faaf-467e-b7a6-4c371dbf148a")

# values - feedback data records - replace sample values with proper ones
# fields - list of fields names (optional) - replace sample values with proper ones

values = X_test.values.tolist()
fields =  ['CUSTOMER', 'CUST_CRE_REP_GRP', 'AMOUNT', 'PAYMENT_TERMS', 'confirmed_cases']

subscription.feedback_logging.store(feedback_data=values, fields=fields)

ApiRequestFailure: Failure during feedback records storing. (POST https://default-cpd-default.miracle311-273183-3cfa7977c157a3e8d61067e13964bdf8-0001.us-south.containers.appdomain.cloud/v1/data_marts/00000000-0000-0000-0000-000000000000/feedback_payloads)
Status code: 400, body: {"trace":"ZjAyYzQxYWMtMzljYy00M2IwLTlhYTgtY2E3NmI5MGEyNmQ0","errors":[{"code":"AIQFS0004E","message":"Parsing failure - associated message: EntityStreamSizeException: actual entity size (Some(8620786)) exceeded content length limit (8388608 bytes)! You can configure this by setting `akka.http.[server|client].parsing.max-content-length` or calling `HttpEntity.withSizeLimit` before materializing the dataBytes stream.","parameters":["EntityStreamSizeException: actual entity size (Some(8620786)) exceeded content length limit (8388608 bytes)! You can configure this by setting `akka.http.[server|client].parsing.max-content-length` or calling `HttpEntity.withSizeLimit` before materializing the dataBytes stream."]}]}

In [50]:
trainx = pd.DataFrame(X_train).reset_index(drop=True)

In [51]:
print(trainx)

        CUSTOMER  CUST_CRE_REP_GRP    AMOUNT  PAYMENT_TERMS  confirmed_cases
0              1                 3     16.02              1               61
1             12                 0    394.27              8              456
2             17                 1      5.34              3            17298
3             18                 1     25.81              1               16
4             17                 1      5.34              3            17298
5             17                 1     19.58              3            17298
6              4                 3     35.60              1               93
7             15                 1      8.01              2               28
8             12                 0     72.98              8              456
9             11                 0   -159.31              1                8
10            12                 0    361.34              8              456
11            12                 0    255.43              8              456

In [52]:
trainy = pd.DataFrame(y_train).reset_index(drop=True)
print(trainy)

        ON_TIME_PRED
0                  1
1                  1
2                  0
3                  1
4                  0
5                  0
6                  1
7                  0
8                  1
9                  1
10                 1
11                 1
12                 1
13                 1
14                 1
15                 0
16                 1
17                 1
18                 1
19                 0
20                 0
21                 1
22                 1
23                 1
24                 0
25                 0
26                 0
27                 1
28                 0
29                 0
...              ...
537541             0
537542             1
537543             1
537544             1
537545             0
537546             0
537547             1
537548             1
537549             0
537550             1
537551             0
537552             1
537553             0
537554             0
537555             1
537556       

In [53]:
train_set = pd.concat([trainx, trainy], axis=1)

In [55]:
print(train_set)

        CUSTOMER  CUST_CRE_REP_GRP    AMOUNT  PAYMENT_TERMS  confirmed_cases  \
0              1                 3     16.02              1               61   
1             12                 0    394.27              8              456   
2             17                 1      5.34              3            17298   
3             18                 1     25.81              1               16   
4             17                 1      5.34              3            17298   
5             17                 1     19.58              3            17298   
6              4                 3     35.60              1               93   
7             15                 1      8.01              2               28   
8             12                 0     72.98              8              456   
9             11                 0   -159.31              1                8   
10            12                 0    361.34              8              456   
11            12                 0    25

In [56]:
from project_lib import Project
project = Project.access()
project.save_data("AR_CLASS_TRAIN.csv", train_set.to_csv(index=False))

{'file_name': 'AR_CLASS_TRAIN.csv',
 'message': 'File saved to project storage.',
 'asset_id': 'af2823e8-161f-4192-8e61-0e56ca62fa6d'}