In [3]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import numpy as np

# Example historical sales order data (dummy data)
data = {
    'date': pd.date_range(start='2022-01-01', periods=24, freq='M'),
    'sales_order_amount': [100, 150, 120, 130, 110, 160, 170, 180, 140, 130, 150, 160,
                           170, 180, 150, 140, 160, 170, 180, 150, 160, 170, 180, 190]
}
sales_df = pd.DataFrame(data)
sales_df.set_index('date', inplace=True)

# Fit an ARIMA model (order p, d, q should be determined based on data analysis)
model = SARIMAX(sales_df['sales_order_amount'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()

# Forecast future sales orders
forecast_steps = 6  # For the next 6 months
forecast = results.get_forecast(steps=forecast_steps)
forecast_df = forecast.summary_frame()

# Display forecasted values
print(forecast_df[['mean', 'mean_ci_lower', 'mean_ci_upper']])

# Example credit data
credit_data = {
    'current_outstanding_amount': [1000, 2000, 1500, 5000, 1200, 3000, 4000, 2500, 3500, 2200,1000,200,300,400,500,1000,600,500,700,800,65,7600,3400,7800],
    'credit_limit': [4000, 3000, 6000, 2500, 4500, 5000, 3000, 4000, 3000,100,20,50,60,70,70,80,90,100,400,500,500,20,50,40]
}
credit_df = pd.DataFrame(credit_data)

# Assuming you want to predict the status of these customers for the next period
# Merge with the forecasted sales order amounts
num_forecasts = len(forecast_df)
num_samples = len(credit_df)

# Ensure that the forecasted values match the number of samples you want to predict
if num_forecasts >= num_samples:
    credit_df['predicted_sales_order_amount'] = forecast_df['mean'].values[:num_samples]
else:
    # If there are fewer forecasted values, pad with mean of the forecasted values or handle accordingly
    credit_df['predicted_sales_order_amount'] = np.pad(forecast_df['mean'].values, (0, num_samples - num_forecasts), 'edge')

# Calculate potential new outstanding amount
credit_df['potential_new_outstanding_amount'] = credit_df['current_outstanding_amount'] + credit_df['predicted_sales_order_amount']

# Determine if it exceeds the credit limit
credit_df['exceeds_credit_limit'] = credit_df['potential_new_outstanding_amount'] > credit_df['credit_limit']

# Preparing features and target
X = credit_df[['current_outstanding_amount', 'credit_limit', 'predicted_sales_order_amount']]
y = credit_df['exceeds_credit_limit']

# Check the class distribution
print(y.value_counts())

# Stratified splitting to maintain the class distribution
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Check the class distribution in the training set
print(y_train.value_counts())
print(y_test.value_counts())

# Model training
if len(y_train.unique()) < 2:
    print("Training data does not contain both classes.")
else:
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    print(classification_report(y_test, y_pred))

    # Display test set with predictions
    test_results = X_test.copy()
    test_results['predicted_exceeds_limit'] = y_pred
    print(test_results)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


sales_order_amount        mean  mean_ci_lower  mean_ci_upper
2024-01-31          196.648768     146.145615     247.151920
2024-02-29          231.278880     177.628568     284.929193
2024-03-31          211.465168     152.666065     270.264270
2024-04-30          209.193130     147.655184     270.731076
2024-05-31          203.552437     137.497695     269.607180
2024-06-30          239.528017     171.010808     308.045227
True     17
False     7
Name: exceeds_credit_limit, dtype: int64
True     13
False     6
Name: exceeds_credit_limit, dtype: int64
True     4
False    1
Name: exceeds_credit_limit, dtype: int64
              precision    recall  f1-score   support

       False       1.00      1.00      1.00         1
        True       1.00      1.00      1.00         4

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5

    current_outstanding_amount  credit_limit  predict

In [4]:
import pickle
pickle.dump(model,open('model1.pkl','wb'))

In [5]:
pickled_model=pickle.load(open('model1.pkl','rb'))

In [6]:
pickled_model.predict(X_test)

array([ True,  True,  True, False,  True])