In [23]:
from google.cloud import storage
import joblib
import io
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression

In [17]:
client = storage.Client()

In [18]:
# specify bucket
bucket_name = 'test-bucket-salome'

# specify model path
model_path = 'logistic_regression_model.pkl'

# get the bucket and blob
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(model_path)
blob.download_to_filename('local_model.pkl')

In [36]:
# load the model
model = joblib.load('local_model.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Import new data to use the loaded model for predictions

In [24]:
# specify file name
file_name = 'preprocessed_data.csv'

# get the bucket and blob
blob = bucket.blob(file_name)
file_content = blob.download_as_text()
file_io = io.StringIO(file_content)

# download data into pandas df
df = pd.read_csv(file_io)

Pre-process data in the same way as the train data

In [25]:
df.head(1)

Unnamed: 0,CustomerID,Gender,Senior_Citizen,Partner,Dependents,Tenure_Months,Phone_Service,Multiple_Lines,Internet_Service,Online_Security,...,Device_Protection,Tech_Support,Streaming_TV,Streaming_Movies,Contract,Paperless_Billing,Payment_Method,Monthly_Charges,Total_Charges,Churn_Label
0,3668-QPYBK,Male,No,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes


In [26]:
# drop customer ID
df = df.drop(columns=['CustomerID'])

In [27]:
# drop missing values
df = df.dropna()

In [28]:
# encode categorical variables
label_encoder = LabelEncoder()
for column in df.select_dtypes(include=['object']).columns:
  df[column] = label_encoder.fit_transform(df[column])

In [33]:
# data split
X = df.drop(columns=['Churn_Label'])
y = df['Churn_Label']

In [34]:
# normalize - scaler
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [38]:
predictions = model.predict(X)

In [39]:
print(predictions)

[0 0 0 ... 0 0 0]
