# Churn Feature Pipeline

### Loading the data and feature engineering.

In [1]:
# Imports
!pip install -U hopsworks --quiet

In [2]:
import pandas as pd

# ignore Warning
import warnings
warnings.filterwarnings('ignore')

### Loading the Data

In [3]:
# Read demography data
demography_df = pd.read_csv("https://repo.hops.works/dev/davit/churn/demography.csv")

# Read customer info data with datetime parsing
customer_info_df = pd.read_csv(
    "https://repo.hops.works/dev/davit/churn/customer_info.csv",
    parse_dates=['datetime'],
)

# Read subscriptions data with datetime parsing
subscriptions_df = pd.read_csv(
    "https://repo.hops.works/dev/davit/churn/subscriptions.csv",
    parse_dates=['datetime'],
)

In [4]:
demography_df.head(3)

Unnamed: 0,customerID,gender,SeniorCitizen,Dependents,Partner
0,7590-VHVEG,Female,0,No,Yes
1,5575-GNVDE,Male,0,No,No
2,3668-QPYBK,Male,0,No,No


In [5]:
customer_info_df.head(3)

Unnamed: 0,customerID,Contract,tenure,PaymentMethod,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,datetime
0,7590-VHVEG,Month-to-month,1,Electronic check,Yes,29.85,29.85,No,2021-10-25 15:07:18.625390512
1,5575-GNVDE,One year,34,Mailed check,No,56.95,1889.5,No,2020-06-28 06:32:24.674808292
2,3668-QPYBK,Month-to-month,2,Mailed check,Yes,53.85,108.15,Yes,2021-12-05 20:10:58.449304176


In [6]:
subscriptions_df.head(3)

Unnamed: 0,customerID,DeviceProtection,OnlineBackup,OnlineSecurity,InternetService,MultipleLines,PhoneService,TechSupport,StreamingMovies,StreamingTV,datetime
0,7590-VHVEG,No,Yes,No,DSL,No phone service,No,No,No,No,2021-10-25 15:07:18.625390512
1,5575-GNVDE,Yes,No,Yes,DSL,No,Yes,No,No,No,2020-06-28 06:32:24.674808292
2,3668-QPYBK,No,Yes,Yes,DSL,No,Yes,No,No,No,2021-12-05 20:10:58.449304176


## Feature Engineering

In [7]:
# Converting the "TotalCharges" column to numeric
customer_info_df["TotalCharges"] = pd.to_numeric(customer_info_df["TotalCharges"], errors='coerce')

# Replace NaN values in the "TotalCharges" column with 0
customer_info_df["TotalCharges"].fillna(0, inplace=True)

# Replace values in the "Churn" column with 0 for "No", and 1 for "Yes"
customer_info_df["Churn"].replace({"No": 0, "Yes": 1}, inplace=True)

## Creating Feature Groups

Create a feature group you need to connect to Hopsworks feature store.

In [10]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.
Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1133716
Connected. Call `.close()` to terminate connection gracefully.


To create a feature group you need to give it a name and specify a primary key. It is also good to provide a description of the contents of the feature group.

In [11]:
# Get or Create the 'customer_info' feature group
customer_info_fg = fs.get_or_create_feature_group(
    name="customer_info",
    description="Customer info for churn prediction",
    primary_key=["customerID"],
    version=1,
    event_time="datetime"
)

In [13]:
# Insert data into feature group
customer_info_fg.insert(customer_info_df)

Uploading Dataframe: 0.00% |          | Rows 0/7043 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: customer_info_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/1133716/jobs/named/customer_info_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x2881b96a990>, None)

In [14]:
# Update feature descriptions
feature_descriptions = [
    {"name": "customerid", "description": "Customer id"}, 
    {"name": "contract", "description": "Type of contact"}, 
    {"name": "tenure", "description": "How long they’ve been a customer"}, 
    {"name": "paymentmethod", "description": "Payment method"}, 
    {"name": "paperlessbilling", "description": "Whether customer has paperless billing or not"}, 
    {"name": "monthlycharges", "description": "Monthly charges"}, 
    {"name": "totalcharges", "description": "Total charges"},
    {"name": "churn", "description": "Whether customer has left within the last month or not"},
    {"name": "datetime", "description": "Date when the customer information was recorded"},
]

for feature in feature_descriptions:
    customer_info_fg.update_feature_description(feature["name"], feature["description"])

In [15]:
# Get or create the 'customer_demography_info' feature group
demography_fg = fs.get_or_create_feature_group(
    name="customer_demography_info",
    description="Customer demography info for churn prediction.",
    primary_key=["customerID"],
    version=1,
)

# Insert data into feature group
demography_fg.insert(demography_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1133716/fs/1124419/fg/1305936


Uploading Dataframe: 0.00% |          | Rows 0/7043 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: customer_demography_info_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/1133716/jobs/named/customer_demography_info_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x2881b082300>, None)

In [16]:
# Update feature descriptions
feature_descriptions =  [
    {"name": "customerid", "description": "Customer id"}, 
    {"name": "gender", "description": "Customer gender"},
    {"name": "seniorcitizen", "description": "Whether customer is a senior citizen or not"}, 
    {"name": "dependents", "description": "Whether customer has dependents or not"}, 
    {"name": "partner", "description": "Whether customer has partners or not"}, 
]

for feature in feature_descriptions:
    demography_fg.update_feature_description(feature["name"], feature["description"])

In [17]:
# Get or create the 'customer_subscription_info' feature group
subscription_fg = fs.get_or_create_feature_group(
    name="customer_subscription_info",
    description="Customer subscription info for churn prediction.",
    primary_key=["customerID"],
    version=1,
    event_time="datetime"
)

subscription_fg.insert(subscriptions_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1133716/fs/1124419/fg/1310026


Uploading Dataframe: 0.00% |          | Rows 0/7043 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: customer_subscription_info_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/1133716/jobs/named/customer_subscription_info_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x2881b174560>, None)

In [18]:
# Update feature descriptions
feature_descriptions = [
    {"name": "customerid", "description": "Customer id"}, 
    {"name": "deviceprotection", "description": "Whether customer has signed up for device protection service"},
    {"name": "onlinebackup", "description": "Whether customer has signed up for online backup service"}, 
    {"name": "onlinesecurity", "description": "Whether customer has signed up for online security service"}, 
    {"name": "internetservice", "description": "Whether customer has signed up for internet service"}, 
    {"name": "multiplelines", "description": "Whether customer has signed up for multiple lines service"}, 
    {"name": "phoneservice", "description": "Whether customer has signed up for phone service"}, 
    {"name": "techsupport", "description": "Whether customer has signed up for tech support service"}, 
    {"name": "streamingmovies", "description": "Whether customer has signed up for streaming movies service"}, 
    {"name": "streamingtv", "description": "Whether customer has signed up for streaming TV service"},
    {"name": "datetime", "description": "Date when the customer information was recorded"},
]

for feature in feature_descriptions:
    subscription_fg.update_feature_description(feature["name"], feature["description"])