In [1]:
from flask import Flask, request, jsonify
import pickle
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [2]:
app = Flask(__name__)

# Load the pre-trained models and encoder
with open('base_model.pkl', 'rb') as f:
    base_model = pickle.load(f)

with open('min_price_model.pkl', 'rb') as f:
    lower_model = pickle.load(f)

with open('max_price_model.pkl', 'rb') as f:
    upper_model = pickle.load(f)


In [None]:
# read json response to create the data

def process_data_from_json(data):
    
    

In [3]:
# Define preprocessing
def preprocess_input(data):
    try:
        print("Raw Data", str(data))
        competitor_columns = competitor_columns = [col for col in data.columns if col.startswith('competitor')][:5]
        other_columns = [col for col in data.columns if not col.startswith('competitor')]
        keep_columns = other_columns + competitor_columns

        data[competitor_columns] = data[competitor_columns].apply(lambda x: x.fillna(x.mean()), axis=1)
        data = data[keep_columns]
        data['max_discount'] = np.round(data['actual_price'] - data[competitor_columns].min(axis=1), 2)
    
        data['min_price'] = data[competitor_columns].min(axis=1)
        # numeric_features = [col for col in data.columns if pd.api.types.is_numeric_dtype(data[col])]
        # preprocessor = ColumnTransformer(
        #     transformers=[
        #         ('num', StandardScaler(), numeric_features)
        #     ])
        # print("Transformed Data", data)
        # preprocessor.fit(data)
        # return preprocessor.transform(data)
        print('TRANSFORMED DATA', data)
        return data.drop(columns=['product_id', 'product_type'])
    except Exception as e:
        print("Error", str(e))
    

In [None]:
# Endpoint to predict promotional price
@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get JSON request data
        json_data = request.get_json()

        # Check if the input data is in the correct format
        if not isinstance(json_data, list):
            raise ValueError("Input data should be a list of dictionaries")
        
        for item in json_data:
            if not isinstance(item, dict):
                raise ValueError("Each item in the input data should be a dictionary")
                
        # Convert JSON to DataFrame
        input_data = pd.DataFrame(json_data)
       # input_data = input_data.drop(columns=['product_id', 'product_type', 'timestamp', 'promotional_price'])
        # Preprocess input data
        X_input = preprocess_input(input_data)
        print('INPUT DATA', X_input)

        # Predict the base price
        base_predictions = base_model.predict(X_input)
        
        # Predict the quantiles
        lower_bound = lower_model.predict(X_input)
        upper_bound = upper_model.predict(X_input)
        
        # Create a response JSON
        response = []
        for idx, row in input_data.iterrows():
            result = {
                'Product_ID': row['product_id'],
                'Predicted_Price': base_predictions[idx],
                'Predicted Min Price': lower_bound[idx],
                'Predicted Max Price': upper_bound[idx]
            }
            response.append(result)
        
        return jsonify(response)

    except Exception as e:
        print(str(e))
        return jsonify({'error: something went wrong': str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True, port=6767, use_reloader=False)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:6767
Press CTRL+C to quit


Raw Data   product_id  actual_price  competitor1  competitor2  competitor3  \
0        tv1          1200         1000         1100         1050   

   competitor4  competitor5 product_type  
0          950          980  electronics  
TRANSFORMED DATA   product_id  actual_price product_type  competitor1  competitor2  \
0        tv1          1200  electronics         1000         1100   

   competitor3  competitor4  competitor5  max_discount  min_price  
0         1050          950          980           250        950  
INPUT DATA    actual_price  competitor1  competitor2  competitor3  competitor4  \
0          1200         1000         1100         1050          950   

   competitor5  max_discount  min_price  
0          980           250        950  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['max_discount'] = np.round(data['actual_price'] - data[competitor_columns].min(axis=1), 2)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['min_price'] = data[competitor_columns].min(axis=1)
127.0.0.1 - - [28/Jun/2024 17:05:14] "POST /predict HTTP/1.1" 200 -


In [None]:
%tb