In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Step 1: Load the dataframe
df = pd.read_csv('dev/data/teleCust1000t.csv')

# Step 2: Preprocess the data using standard scaling
X = df.drop('custcat', axis=1)
y = df['custcat']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Generate a machine learning model using logistic regression
learning_rate = 0.01
model = LogisticRegression(C=100, solver='lbfgs', max_iter=1000)
model.fit(X_train_scaled, y_train)

y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Learning Rate: {learning_rate}")
print(f"Training Accuracy: {train_accuracy}")
print(f"Testing Accuracy: {test_accuracy}")
print(classification_report(y_test, y_test_pred))
print(f"Model Parameters: {model.get_params()}")
print(X_train.columns.tolist())
print(y_train.name)

# Step 5: Save the best performing model using pickle
model_data = {
    'model': model,
    'scaler': scaler,
    'input_schema': X_train.columns.tolist(),
    'output_schema': y_train.name
}

with open('dev/model/best_lr_model.pkl', 'wb') as file:
    pickle.dump(model_data, file)

# Step 6: Document clearly how to consume the model
"""
To consume the saved model, follow these steps:

1. Load the model using pickle:
    with open('best_lr_model.pkl', 'rb') as file:
        model_data = pickle.load(file)
        model = model_data['model']
        scaler = model_data['scaler']
        input_schema = model_data['input_schema']
        output_schema = model_data['output_schema']

2. Preprocess your input data using the same StandardScaler used during training:
    scaler = StandardScaler()
    scaler.fit(X_train)  # Use the training data to fit the scaler
    input_data_scaled = scaler.transform(input_data)

3. Make predictions using the loaded model:
    predictions = model.predict(input_data_scaled)
"""


Learning Rate: 0.01
Training Accuracy: 0.43625
Testing Accuracy: 0.435
              precision    recall  f1-score   support

           1       0.46      0.43      0.45        60
           2       0.35      0.33      0.34        39
           3       0.46      0.51      0.48        55
           4       0.43      0.43      0.43        46

    accuracy                           0.43       200
   macro avg       0.43      0.43      0.43       200
weighted avg       0.43      0.43      0.43       200

Model Parameters: {'C': 100, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 1000, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
['region', 'tenure', 'age', 'marital', 'address', 'income', 'ed', 'employ', 'retire', 'gender', 'reside']
custcat


"\nTo consume the saved model, follow these steps:\n\n1. Load the model using pickle:\n    with open('best_lr_model.pkl', 'rb') as file:\n        model_data = pickle.load(file)\n        model = model_data['model']\n        scaler = model_data['scaler']\n        input_schema = model_data['input_schema']\n        output_schema = model_data['output_schema']\n\n2. Preprocess your input data using the same StandardScaler used during training:\n    scaler = StandardScaler()\n    scaler.fit(X_train)  # Use the training data to fit the scaler\n    input_data_scaled = scaler.transform(input_data)\n\n3. Make predictions using the loaded model:\n    predictions = model.predict(input_data_scaled)\n"

In [2]:
# Verify model has predict method before saving
print(f"Model type before saving: {type(model)}")
print(f"Model has predict method: {hasattr(model, 'predict')}")
print(f"Model attributes: {dir(model)}")

# Save model
model_path = 'dev/model/best_lr_model.pkl'
os.makedirs(os.path.dirname(model_path), exist_ok=True)

with open(model_path, 'wb') as f:
    pickle.dump(model, f)

# Verify saved model
with open(model_path, 'rb') as f:
    loaded_model = pickle.load(f)
    print(f"\nLoaded model type: {type(loaded_model)}")
    print(f"Loaded model has predict: {hasattr(loaded_model, 'predict')}")

Model type before saving: <class 'sklearn.linear_model._logistic.LogisticRegression'>
Model has predict method: True
Model attributes: ['C', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__sklearn_clone__', '__str__', '__subclasshook__', '__weakref__', '_build_request_for_signature', '_check_feature_names', '_check_n_features', '_doc_link_module', '_doc_link_template', '_doc_link_url_param_generator', '_estimator_type', '_get_default_requests', '_get_doc_link', '_get_metadata_request', '_get_param_names', '_get_tags', '_more_tags', '_parameter_constraints', '_predict_proba_lr', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_validate_data', '_validate_params', 'class_weight', '