In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression

from sqlalchemy import create_engine

# Replace these variables with your PostgreSQL connection details
db_host = "localhost"
db_port = 5432
db_name = "postgres"
db_user = "postgres"
db_password = "postgres"

# Create an SQLAlchemy engine
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Query data from the database using SQLAlchemy engine
query = "SELECT * FROM merchant_logs"
df = pd.read_sql_query(query, engine)


# Extract features (independent variables) and target variable
X_train =df[['transaction_amount', 'refund_amount', 'pa_customer', 'ntb_approved',
          'vas_service_count', 'ntb_reject', 'dpd_count', 'npa_count']]
y_train = df['score']

print(y_train)

# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Print the learned weights and intercept
weights = model.coef_
intercept = model.intercept_

print(f"Weights: {weights}")
print(f"Intercept: {intercept}")


0     111.5
1      98.0
2     125.5
3      39.0
4      58.0
5      21.0
6      96.0
7      50.0
8      91.0
9      28.0
10    142.0
11     80.0
12     58.0
13     -7.0
14    125.0
15     63.0
16     56.0
17     44.0
18     15.0
19    125.0
20     88.0
21      7.0
22    102.0
23     11.0
24     71.0
25     55.0
26      0.0
27     84.0
28    109.0
29    136.0
30    102.0
31     42.0
32     44.0
33    128.0
34     99.0
35     99.0
36    114.0
37    101.0
38     10.0
39     65.0
40     32.0
41     69.0
42     14.0
43     85.0
44     43.0
45     32.0
46     62.0
47    100.0
48     97.0
49     83.0
50     84.0
51     75.0
52     44.0
Name: score, dtype: float64
Weights: [ 1.01036427e-04  2.04877093e-05 -7.23505911e-04  9.99366766e-01
  2.00323733e+00 -1.00312695e+00 -3.05246164e-02 -3.75379064e-02]
Intercept: 0.05424217847681234


In [3]:
X_test = df[['transaction_amount', 'refund_amount', 'pa_customer', 'ntb_approved',
             'vas_service_count', 'ntb_reject', 'dpd_count', 'npa_count']]
predicted_scores = model.predict(X_test)

In [4]:
predicted_scores

array([ 1.10680668e+02,  9.85278818e+01,  1.25773267e+02,  3.91761926e+01,
        5.81592311e+01,  2.08794472e+01,  9.58683457e+01,  5.03916448e+01,
        9.12647417e+01,  2.78944257e+01,  1.42206515e+02,  7.98612444e+01,
        5.79116655e+01, -6.83625722e+00,  1.24840146e+02,  6.30076615e+01,
        5.63468792e+01,  4.36989847e+01,  1.45479863e+01,  1.25014202e+02,
        8.73123505e+01,  6.98449870e+00,  1.01669612e+02,  1.13411683e+01,
        7.15448899e+01,  5.44926502e+01, -1.15467397e-01,  8.41656058e+01,
        1.08456415e+02,  1.36523800e+02,  1.02109426e+02,  4.17456332e+01,
        4.43664647e+01,  1.27658643e+02,  9.92011886e+01,  9.85701445e+01,
        1.14385514e+02,  1.00806841e+02,  9.79261067e+00,  6.53127547e+01,
        3.26677943e+01,  6.92184880e+01,  1.41410680e+01,  8.52135919e+01,
        4.30272660e+01,  3.16202995e+01,  6.19188639e+01,  9.98804730e+01,
        9.69649035e+01,  8.25768664e+01,  8.48396553e+01,  7.47003756e+01,
        4.36907381e+01])

In [5]:
df['predicted_scores'] = predicted_scores
df.to_sql('merchant_logs', engine, index=False, if_exists='replace')

# Commit changes
engine.dispose()

storing the model

In [6]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'linear_regression_model.joblib')


['linear_regression_model.joblib']

loading the model

In [7]:
import joblib

# Load the trained model from the file
model = joblib.load('linear_regression_model.joblib')


In [8]:
# Assuming X_production is your new dataset in production
predicted_scores_production = model.predict(X_test)

In [9]:
print(predicted_scores)

[ 1.10680668e+02  9.85278818e+01  1.25773267e+02  3.91761926e+01
  5.81592311e+01  2.08794472e+01  9.58683457e+01  5.03916448e+01
  9.12647417e+01  2.78944257e+01  1.42206515e+02  7.98612444e+01
  5.79116655e+01 -6.83625722e+00  1.24840146e+02  6.30076615e+01
  5.63468792e+01  4.36989847e+01  1.45479863e+01  1.25014202e+02
  8.73123505e+01  6.98449870e+00  1.01669612e+02  1.13411683e+01
  7.15448899e+01  5.44926502e+01 -1.15467397e-01  8.41656058e+01
  1.08456415e+02  1.36523800e+02  1.02109426e+02  4.17456332e+01
  4.43664647e+01  1.27658643e+02  9.92011886e+01  9.85701445e+01
  1.14385514e+02  1.00806841e+02  9.79261067e+00  6.53127547e+01
  3.26677943e+01  6.92184880e+01  1.41410680e+01  8.52135919e+01
  4.30272660e+01  3.16202995e+01  6.19188639e+01  9.98804730e+01
  9.69649035e+01  8.25768664e+01  8.48396553e+01  7.47003756e+01
  4.36907381e+01]


Incremental training of the model based on feedback

In [None]:
# Update existing model with new data incrementally
existing_model = joblib.load('linear_regression_model.joblib')

for new_data_batch in new_data_batches:
    X_train = df[['transaction_amount', 'refund_amount', 'pa_customer', 'ntb_approved',
             'vas_service_count', 'ntb_reject', 'dpd_count', 'npa_count']]
    y_train = df['score']
    existing_model.partial_fit(X_train, y_train)

#saving the model
joblib.dump(existing_model, 'updated_linear_reg_model.joblib')