In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from joblib import dump

# Sample training data
data = pd.DataFrame({
    'created_at': pd.date_range(start='2024-01-01', periods=100, freq='H'),
    'status': ['shipped'] * 50 + ['pending'] * 50
})

data['status_encoded'] = pd.factorize(data['status'])[0]
data['year'] = data['created_at'].dt.year
data['month'] = data['created_at'].dt.month
data['day_of_week'] = data['created_at'].dt.dayofweek
data['hour'] = data['created_at'].dt.hour
data['day'] = data['created_at'].dt.day
data['timestamp'] = data['created_at'].astype(int) / 10**9

# Features and labels
X = data[['timestamp', 'status_encoded', 'year', 'month', 'day_of_week', 'hour', 'day']]
y = data['status_encoded']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Save the model and scaler
dump(model, 'logistic_regression_model.joblib')
dump(scaler, 'scaler.joblib')


  'created_at': pd.date_range(start='2024-01-01', periods=100, freq='H'),


['scaler.joblib']

In [None]:
import joblib

# Save the model
joblib.dump(model, 'logistic_regression_model.joblib')

# To load it later
model = joblib.load('logistic_regression_model.joblib')


In [None]:
os.listdir()


In [15]:
import os

# Check if 'inference.py' exists in the current directory
if os.path.isfile('inference.py'):
    print("inference.py exists!")
else:
    print("inference.py does not exist.")

Class Distribution Before Balancing:
status_encoded
1    1288
2    1265
0    1256
3    1191
Name: count, dtype: int64
Class Distribution After Resampling (RandomOverSampler):
status_encoded
0    1288
1    1288
2    1288
3    1288
Name: count, dtype: int64

Model Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       258
           1       1.00      1.00      1.00       257
           2       1.00      1.00      1.00       258
           3       1.00      1.00      1.00       258

    accuracy                           1.00      1031
   macro avg       1.00      1.00      1.00      1031
weighted avg       1.00      1.00      1.00      1031


Confusion Matrix:
[[258   0   0   0]
 [  0 257   0   0]
 [  0   0 258   0]
 [  0   0   0 258]]


In [None]:
customers_df = pd.read_csv('customers.csv')
print(customers_df.head())



In [None]:
import boto3
import pandas as pd

# Load data from S3
s3_client = boto3.client('s3')
bucket_name = 'logistics-data-sealink'  
file_name = 'dashboard-data/orders.csv' 

# Download the file from S3
s3_client.download_file(bucket_name, file_name, 'orders.csv')

# Load CSV into pandas dataframe
df = pd.read_csv('orders.csv')


In [5]:
import boto3
import joblib
import os

# S3 setup
bucket_name = "logistics-data-sealink"
model_key = "logistic_regression_model.joblib"  # Just the key/path inside the bucket, not the full s3 URL
local_model_path = "/tmp/logistic_regression_model.joblib"  # Local path in the SageMaker environment

# Download model from S3
s3 = boto3.client('s3')
s3.download_file(bucket_name, model_key, local_model_path)

# Load the model using joblib
model = joblib.load(local_model_path)

print("Model loaded successfully!")


In [10]:
import joblib

# Load the extracted model
model = joblib.load('logistic_regression_model.joblib')
print(model)


1,000 customers and 5,000 orders have been generated and saved to 'customers_1000_records.json' and 'orders_5000_records.json'.


In [14]:
print(model.__dict__)  # Inspect the model's attributes


Index(['order_id', 'customer_id', 'order_date', 'delivery_date', 'status',
       'total_amount', 'shipping_address', 'billing_address', 'payment_method',
       'payment_status', 'created_at', 'updated_at'],
      dtype='object')
