In [None]:
import joblib
import numpy as np
import pandas as pd

In [153]:
# Generate synthetic data
np.random.seed(42)

# Number of data points
n_samples = 100000

# Generate random product_quality, product_quantity, product_price, desired_quality, and baseline_product_price
product_quality = np.random.uniform(50, 100, n_samples)
product_quantity_beta = np.random.randint(1, 250, n_samples)
product_quantity = np.array([min(i, 100) for i in product_quantity_beta])
product_price = np.random.uniform(500, 2500, n_samples)
desired_quality = np.full(n_samples, 60)
baseline_product_price = np.full(n_samples, 1500)

# Create a DataFrame
data = pd.DataFrame({
    "Product_quality": product_quality,
    "Product_quantity": product_quantity,
    "Product_price": product_price,
    "Desired_quality": desired_quality,
    "Baseline_product_price": baseline_product_price
})

# Define a function to classify prices
def classify_price(row):
    price_diff_percent = ((row["Product_price"] - row["Baseline_product_price"]) / row["Baseline_product_price"]) * 100
    if abs(price_diff_percent) <= 10:
        return "acceptable_price"
    elif price_diff_percent < -50:
        return "very_low_price"
    elif price_diff_percent > 50:
        return "very_high_price"
    elif price_diff_percent > 10:
        return "high_price"
    else:
        return "low_price"

# Define a function to classify quality
def classify_quality(row):
    quality_diff_percent = ((row["Product_quality"] - row["Desired_quality"]) / row["Desired_quality"]) * 100
    if abs(quality_diff_percent) <= 10:
        return "acceptable_quality"
    elif quality_diff_percent < -10:
        return "low_quality"
    elif quality_diff_percent > 50:
        return "very_high_quality"
    elif quality_diff_percent > 10:
        return "high_quality"
    else:
        return "very_low_quality"

# Define a function to classify quantity
def classify_quantity(row):
    if row["Product_quantity"] >= 80:
        return "acceptable_quantity"
    elif 50 <= row["Product_quantity"] < 80:
        return "low_quantity"
    else:
        return "very low_quantity"

# Apply the classification functions and create corresponding columns
data["Price_comparison"] = data.apply(classify_price, axis=1)
data["Quality_comparison"] = data.apply(classify_quality, axis=1)
data["Quantity_comparison"] = data.apply(classify_quantity, axis=1)


# Define weights for quality, price, and quantity
weight_quality = 0.4
weight_price = 0.3
weight_quantity = 0.3

# Normalize features
min_quality = data["Product_quality"].min()
max_quality = data["Product_quality"].max()
min_price = data["Product_price"].min()
max_price = data["Product_price"].max()
min_quantity = data["Product_quantity"].min()
max_quantity = data["Product_quantity"].max()

data["Normalized_quality"] = (data["Product_quality"] - min_quality) / (max_quality - min_quality)
data["Normalized_price"] = 1-((data["Product_price"] - min_price) / (max_price - min_price))
data["Normalized_quantity"] = (data["Product_quantity"] - min_quantity) / (max_quantity - min_quantity)

# Define a scoring function
def calculate_score(row):
    score = (weight_quality * row["Normalized_quality"]) + (weight_price * row["Normalized_price"]) + (weight_quantity * row["Normalized_quantity"])
    return score

# Apply the scoring function and create the "score" column
data["score"] = data.apply(calculate_score, axis=1)


# Define a function to calculate the fraud score
def calculate_fraud_score(row):
    price_quality_score = (weight_quality * row["Normalized_quality"]) + (weight_price * row["Normalized_price"]) + (weight_quantity * row["Normalized_quantity"])
    
    if price_quality_score > 0.6:
        return "high_fraud_score"  # High fraud score for very cheap prices with good quality
    elif price_quality_score < 0.2:
        return "high_fraud_score"  # High fraud score for very high prices with poor quality
    else:
        return "low_fraud_score"  # Low fraud score for other cases

# Apply the fraud scoring function and create the "fraud_score" column
data["fraud_score"] = data.apply(calculate_fraud_score, axis=1)

# Create a function to calculate AI_scoring
def calculate_ai_scoring(row):
    if row["score"] > 0.5 and row["fraud_score"] == "high_fraud_score":
        return row["score"] / 2
    else:
        return row["score"]

# Apply the function to create the AI_scoring column
data["AI_scoring"] = data.apply(calculate_ai_scoring, axis=1)

# Print the first few rows of the dataset with the score
print(data.head(20))
data.to_csv("score_evaluation_data.csv")

    Product_quality  Product_quantity  Product_price  Desired_quality  \
0         68.727006                98     649.273994               60   
1         97.535715               100    1944.739079               60   
2         86.599697                62    1791.283041               60   
3         79.932924                79    2161.615221               60   
4         57.800932               100    2456.552698               60   
5         57.799726                17    1447.592676               60   
6         52.904181                70    2010.702748               60   
7         93.308807                22    2177.872162               60   
8         80.055751                23    1748.292965               60   
9         85.403629               100     825.368293               60   
10        51.029225                46     760.599454               60   
11        98.495493                49    1930.217659               60   
12        91.622132               100    2454.07495

In [154]:
min_quality,max_quality,min_price,max_price,min_quantity,max_quantity

(50.0002768337869,
 99.99960211514829,
 500.0218274525246,
 2499.96838689967,
 1,
 100)

In [155]:
high_score_high_fraud = data[(data["score"] > 0.5) & (data["fraud_score"] == "high_fraud_score")]

# Print the filtered DataFrame
print(high_score_high_fraud)

       Product_quality  Product_quantity  Product_price  Desired_quality  \
0            68.727006                98     649.273994               60   
1            97.535715               100    1944.739079               60   
9            85.403629               100     825.368293               60   
11           98.495493                49    1930.217659               60   
12           91.622132               100    2454.074957               60   
...                ...               ...            ...              ...   
99993        70.672029               100    1552.453187               60   
99995        89.615242               100     632.327673               60   
99996        88.962645                47     577.705259               60   
99997        83.722670                65     531.773235               60   
99998        74.972362               100     636.389272               60   

       Baseline_product_price  Price_comparison Quality_comparison  \
0                

In [156]:
print(data.columns)

Index(['Product_quality', 'Product_quantity', 'Product_price',
       'Desired_quality', 'Baseline_product_price', 'Price_comparison',
       'Quality_comparison', 'Quantity_comparison', 'Normalized_quality',
       'Normalized_price', 'Normalized_quantity', 'score', 'fraud_score',
       'AI_scoring'],
      dtype='object')


In [157]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Separate data into features and targets
X = data[['Product_quality', 'Product_quantity', 'Product_price', 'Desired_quality', 'Baseline_product_price']]
y_price = LabelEncoder().fit_transform(data['Price_comparison'])  # Example encoding for classification
y_quality = LabelEncoder().fit_transform(data['Quality_comparison'])
y_quantity = LabelEncoder().fit_transform(data['Quantity_comparison'])
y_fraude_score = LabelEncoder().fit_transform(data['fraud_score'])
y_ai_scoring = data['AI_scoring']

# Split data into training and testing sets
X_train, X_test, y_price_train, y_price_test = train_test_split(X, y_price, test_size=0.2, random_state=42)
X_train, X_test, y_quality_train, y_quality_test = train_test_split(X, y_quality, test_size=0.2, random_state=42)
X_train, X_test, y_quantity_train, y_quantity_test = train_test_split(X, y_quantity, test_size=0.2, random_state=42)
X_train, X_test, y_fraude_score_train, y_fraude_score_test = train_test_split(X, y_fraude_score, test_size=0.2, random_state=42)
X_train, X_test, y_ai_scoring_train, y_ai_scoring_test = train_test_split(X, y_ai_scoring, test_size=0.2, random_state=42)

# Create models for each prediction task (you can choose different models)
model_price = RandomForestClassifier(n_estimators=100, random_state=42)
model_quality = RandomForestClassifier(n_estimators=100, random_state=42)
model_quantity = RandomForestClassifier(n_estimators=100, random_state=42)
model_fraude_score = RandomForestClassifier(n_estimators=100, random_state=42)
model_ai_scoring = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the models
model_price.fit(X_train, y_price_train)
model_quality.fit(X_train, y_quality_train)
model_quantity.fit(X_train, y_quantity_train)
model_fraude_score.fit(X_train, y_fraude_score_train)
model_ai_scoring.fit(X_train, y_ai_scoring_train)

# Make predictions
y_price_pred = model_price.predict(X_test)
y_quality_pred = model_quality.predict(X_test)
y_quantity_pred = model_quantity.predict(X_test)
y_fraude_score_pred = model_fraude_score.predict(X_test)
y_ai_scoring_pred = model_ai_scoring.predict(X_test)

# Evaluate models (use appropriate metrics for each task)
mse_price = mean_squared_error(y_price_test, y_price_pred)
r2_quality = r2_score(y_quality_test, y_quality_pred)
# Add other evaluation metrics for quantity and AI scoring

print(f"MSE for Price: {mse_price}")
print(f"R-squared for Quality: {r2_quality}")
# Add other evaluation metrics

MSE for Price: 5e-05
R-squared for Quality: 1.0


In [158]:
r2_fraude_score = r2_score(y_fraude_score_test, y_fraude_score_pred)
print(f"R-squared for Quality: {r2_fraude_score}")

R-squared for Quality: 0.9755968839661137


# Saving model

In [90]:
import joblib

In [159]:
# Save the trained models to files
joblib.dump(model_price, 'models/model_price.pkl')
joblib.dump(model_quality, 'models/model_quality.pkl')
joblib.dump(model_quantity, 'models/model_quantity.pkl')
joblib.dump(model_fraude_score, 'models/model_fraude_score.pkl')
joblib.dump(model_ai_scoring, 'models/model_ai_scoring.pkl')

['models/model_ai_scoring.pkl']

In [160]:
# Load the trained models from files
loaded_model_price = joblib.load('models/model_price.pkl')
loaded_model_quality = joblib.load('models/model_quality.pkl')
loaded_model_quantity = joblib.load('models/model_quantity.pkl')
loaded_model_fraude_score = joblib.load('models/model_fraude_score.pkl')
loaded_model_ai_scoring = joblib.load('models/model_ai_scoring.pkl')

In [174]:
data_1 = pd.DataFrame({
    "Product_quality": [0],
    "Product_quantity": [0],
    "Product_price": [500],
    "Desired_quality": [60],
    "Baseline_product_price": [1500]
})
price_predictions = loaded_model_price.predict(data_1)
quality_predictions = loaded_model_quality.predict(data_1)
quantity_predictions = loaded_model_quantity.predict(data_1)
fraude_score_predictions = loaded_model_fraude_score.predict(data_1)
ai_scoring_predictions = loaded_model_ai_scoring.predict(data_1)
print("Price Predictions:", price_predictions)
print("Quality Predictions:", quality_predictions)
print("Quantity Predictions:", quantity_predictions)
print("fraude_score Predictions:", fraude_score_predictions)
print("AI Scoring Predictions:", ai_scoring_predictions)

Price Predictions: [4]
Quality Predictions: [2]
Quantity Predictions: [2]
fraude_score Predictions: [1]
AI Scoring Predictions: [0.32058542]
