# Customer CLTV Value Prediction 

### This notebook

In [None]:
# Import python packages
import pandas as pd
import time
from snowflake.ml.model import custom_model
from snowflake.ml.registry import registry

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


### Set the database, schema and warehouse

In [None]:
session.sql("use database CUSTOMER_SYNTHETIC_DATA").collect()
session.sql("use schema CLTV").collect()
session.sql("use warehouse CUSTOMER_CLTV").collect()

### Get the input dataset

In [None]:
train_dataset = session.table('customer_cltv') #This is a snowpark dataframe
train_dataset.show()

In [None]:
# For Local testing
#Run this only for local testing - selecting subset of data based on customer_id
random_cust_ids = train_dataset.select(train_dataset.col("CUST_ID")).distinct().sample(n=2).collect()
print(random_cust_ids)
train_dataset_dummy = train_dataset.filter(train_dataset.col('CUST_ID').in_(random_cust_ids))
train_dataset_dummy.count()

In [None]:
# For Local testing
train_dataset_dummy.count()
train_dataset_dummy_pd = train_dataset_dummy.to_pandas()
train_dataset_dummy_pd.dtypes
print(train_dataset_dummy_pd.dtypes)

In [None]:
REGISTRY_DATABASE_NAME = session.get_current_database()
REGISTRY_SCHEMA_NAME = session.get_current_schema()

reg = registry.Registry(session=session, database_name=REGISTRY_DATABASE_NAME, schema_name=REGISTRY_SCHEMA_NAME)

### Custom model code
###### The below code is run using snowflake partition model feature
###### Documentation: https://docs.snowflake.com/LIMITEDACCESS/snowpark-ml-partitioned-training-inference
###### Custom model def doc: https://docs.snowflake.com/developer-guide/snowpark-ml/model-registry/custom-models#label-snowpark-model-registry-custom-model-writing

###### Things to note
###### 1. Please keep input and output of predict/custom model function as pandas. Snowflake will automatically handle distributing for you. Function can be called using snowpark dataframe(to pandas conversion auto handled by snowflake) 

In [None]:
class ForecastingModel(custom_model.CustomModel):

    # Use the same decorator as for methods with FUNCTION inference.
    @custom_model.partitioned_inference_api
    def predict(self, df:pd.DataFrame) -> pd.DataFrame:    #Keep input and output here as pandas   
        ################## Replace below with your algorithm code ######################################## 
        import pandas as pd
        from statsmodels.tsa.holtwinters import ExponentialSmoothing
        from datetime import datetime, timedelta
        import warnings
        warnings.filterwarnings("ignore")  # Suppress unnecessary warnings
        
        # Convert 'TS' to datetime
        df['TS'] = pd.to_datetime(df['TS'])

        # Ensure data is sorted by TS
        df = df.sort_values(by=['CUST_ID', 'TS']).set_index('TS')
        
        # Function to forecast CLTV for a single customer
        def forecast_cltv(customer_data):
            #customer_data = customer_data.asfreq('MS')
            model = ExponentialSmoothing(customer_data['CLTV'], trend="add", seasonal=None)
            fit = model.fit()
            forecast = fit.forecast(12)  # Forecast for 12 months
            print('done with forecast')
            forecast_dates = pd.date_range(start=customer_data.index.max(), periods=12, freq='MS')
            # Convert Series to DataFrame
            forecast_df = pd.DataFrame({
                'TS_FORECAST': forecast_dates,
                'CLTV_FORECAST': forecast.values
            })
            
            return forecast_df

        forecast_df = forecast_cltv(df)
        return forecast_df


In [None]:
cltv_forecasting_model = ForecastingModel()
local_predictions = cltv_forecasting_model.predict(train_dataset_dummy_pd)
print(local_predictions)

In [None]:
# https://docs.snowflake.com/en/developer-guide/snowpark-ml/model-registry/model-signature
# https://docs.snowflake.com/en/developer-guide/snowpark-ml/model-registry/overview#registering-models-and-versions
options = {
    "function_type": "TABLE_FUNCTION",
}

mv = reg.log_model(
    cltv_forecasting_model,
    model_name="cltv_forecast",
    conda_dependencies=['pandas', 'statsmodels==0.13.5', 'snowflake-snowpark-python'],
    options=options,
    sample_input_data=train_dataset_dummy,
)

In [None]:
#If you don't have permission to alter size of WH either switch to another bigger warehouse or continue with current one.
session.sql(
    f"ALTER WAREHOUSE {session.get_current_warehouse()[1:-1]} SET WAREHOUSE_SIZE='6X-Large';"
).collect()

In [None]:
from snowflake.snowpark.functions import to_timestamp_ntz
train_dataset = train_dataset.with_column("TS", to_timestamp_ntz(train_dataset["TS"]))


# Train and Predict the CLTV values and save results in a table in Snowflake.
#### Partition models in Snowflake 

In [None]:
start_time = time.time()

# Train and Predict CLTV values for 100 million customers
results = mv.run(
  train_dataset, #Can be a pandas df or snowpark df
  function_name="PREDICT",
  partition_column="CUST_ID"
)
results.write.save_as_table('Prediction_results', mode='overwrite')
end_time = time.time()

# Calculate elapsed time in minutes
elapsed_time_minutes = (end_time - start_time) / 60
print(f"Execution time: {elapsed_time_minutes:.2f} minutes")


In [None]:
#Resize to small warehouse
session.sql(
    f"ALTER WAREHOUSE {session.get_current_warehouse()[1:-1]} SET WAREHOUSE_SIZE='SMALL';"
).collect()

### Save the prediction results to a table.

In [None]:
results.show(5)