In [13]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("donation_requests.csv")

# Rename columns for consistency
df.rename(columns={
    "Request_ID": "request_id",
    "Institute_Name": "institute",
    "Item_Requested": "item",
    "Quantity": "quantity",
    "Urgency_Level": "urgency",
    "Past_Donation_History": "past_donations",
    "Consumption_Rate": "consumption",
    "Anomaly_Score": "anomaly_score",
    "Approval_Status": "approval"
}, inplace=True)

# Convert categorical columns properly
categorical_cols = ["institute", "item", "urgency", "approval"]
df[categorical_cols] = df[categorical_cols].astype(str)

# Convert numeric columns properly
numeric_cols = ["quantity", "past_donations", "consumption", "anomaly_score"]
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Fill missing numeric values with the median
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Fill missing categorical values with "Unknown"
df[categorical_cols] = df[categorical_cols].fillna("Unknown")

# Print first few rows to verify
print(df.head())

# Save cleaned dataset
df.to_csv("cleaned_donation_requests.csv", index=False)





   request_id          institute             item  quantity urgency  \
0           1      Helping Hands  School Supplies       272    High   
1           2  Elderly Care Home         Blankets       264     Low   
2           3      Helping Hands    Sanitary Pads        93  Medium   
3           4     Hope Orphanage   Drinking Water        19  Medium   
4           5  Elderly Care Home    Sanitary Pads       185  Medium   

   past_donations  consumption  anomaly_score approval  
0               9         0.99           0.08  Genuine  
1              44         1.73           0.45  Genuine  
2              11         1.75           0.10  Genuine  
3              45         1.81           0.21  Genuine  
4              45         1.59           0.23  Genuine  


In [15]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# ----- Encoding Categorical Features -----
label_encoders = {}

categorical_cols = ["urgency", "approval"]
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoders for later use

print("\n--- Encoded Categorical Features ---")
print(df.head())

# ----- Feature Scaling (Optional) -----
scaler = StandardScaler()
numeric_features = ["quantity", "past_donations", "consumption", "anomaly_score"]

df_scaled = df.copy()
df_scaled[numeric_features] = scaler.fit_transform(df[numeric_features])

print("\n--- Scaled Numerical Features ---")
print(df_scaled.head())



--- Encoded Categorical Features ---
   request_id          institute             item  quantity  urgency  \
0           1      Helping Hands  School Supplies       272        0   
1           2  Elderly Care Home         Blankets       264        1   
2           3      Helping Hands    Sanitary Pads        93        2   
3           4     Hope Orphanage   Drinking Water        19        2   
4           5  Elderly Care Home    Sanitary Pads       185        2   

   past_donations  consumption  anomaly_score  approval  
0               9         0.99           0.08         1  
1              44         1.73           0.45         1  
2              11         1.75           0.10         1  
3              45         1.81           0.21         1  
4              45         1.59           0.23         1  

--- Scaled Numerical Features ---
   request_id          institute             item  quantity  urgency  \
0           1      Helping Hands  School Supplies  0.138670        0   
1 

In [17]:
from sklearn.model_selection import train_test_split

# Define target variable and features
target = "approval"  # Change to "quantity" if regression
X = df_scaled.drop(columns=[target, "request_id", "institute", "item"])  # Remove non-numeric cols
y = df_scaled[target]

# Split into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training Data: {X_train.shape}, Testing Data: {X_test.shape}")


Training Data: (8000, 5), Testing Data: (2000, 5)


In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       529
           1       1.00      1.00      1.00      1471

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [21]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 5.000000000000005e-06
R² Score: 0.9999997429831179


In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize model
rf = RandomForestClassifier(random_state=42)

# Perform Randomized Search
random_search = RandomizedSearchCV(
    rf, param_distributions=param_grid, n_iter=10, cv=5, scoring='accuracy', random_state=42, n_jobs=-1
)

# Train model
random_search.fit(X_train, y_train)

# Get best parameters
print("Best Parameters:", random_search.best_params_)

# Evaluate the best model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report
print("Tuned Model Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Best Parameters: {'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 20}
Tuned Model Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       529
           1       1.00      1.00      1.00      1471

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [25]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize model
rf = RandomForestRegressor(random_state=42)

# Perform Randomized Search
random_search = RandomizedSearchCV(
    rf, param_distributions=param_grid, n_iter=10, cv=5, scoring='r2', random_state=42, n_jobs=-1
)

# Train model
random_search.fit(X_train, y_train)

# Get best parameters
print("Best Parameters:", random_search.best_params_)

# Evaluate the best model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

from sklearn.metrics import mean_absolute_error, r2_score
print("Tuned Model MAE:", mean_absolute_error(y_test, y_pred))
print("Tuned Model R² Score:", r2_score(y_test, y_pred))


Best Parameters: {'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 20}
Tuned Model MAE: 0.0
Tuned Model R² Score: 1.0


In [29]:
pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [31]:
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define model building function
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on problem type

    # Compile model
    model.compile(
        optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [0.001, 0.0001, 0.00001])),
        loss='binary_crossentropy',  # Change based on your task (e.g., 'mse' for regression)
        metrics=['accuracy']
    )
    return model

# Initialize tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,  # Number of different models to try
    executions_per_trial=1, 
    directory='tuning_results'
)

# Start tuning
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Get the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hps.values}")



Trial 5 Complete [00h 00m 07s]
val_accuracy: 0.6265000104904175

Best val_accuracy So Far: 0.9415000081062317
Total elapsed time: 00h 00m 37s
Best Hyperparameters: {'units': 384, 'learning_rate': 0.0001}


In [35]:
import pickle

# Save the model
with open("donation_fraud_model.pkl", "wb") as f:
    pickle.dump(model, f)



In [37]:
# Load the model
with open("donation_fraud_model.pkl", "rb") as f:
    model = pickle.load(f)


In [39]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Predict the target variable for the test set
y_pred = model.predict(X_test)

# Calculate R² score (Coefficient of Determination)
r2 = r2_score(y_test, y_pred)
print(f"R² Score: {r2}")

# You can also use other regression metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")


R² Score: 0.9999997429831179
Mean Absolute Error: 5.000000000000005e-06
Mean Squared Error: 5.000000000000009e-08
Root Mean Squared Error: 0.00022360679774997917




In [41]:
pip install flask joblib


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from flask import Flask, request, jsonify
import joblib
import numpy as np

# Load the trained model
model = joblib.load('donation_fraud_model.pkl')

# Initialize Flask app
app = Flask(__name__)

@app.route('/')
def index():
    return app.send_static_file('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get data from POST request
        data = request.get_json()
        features = np.array(data['features']).reshape(1, -1)  # Reshape input data
        
        # Predict using the model
        prediction = model.predict(features)
        
        # Return prediction as JSON
        return jsonify({'prediction': prediction[0]})
    
    except Exception as e:
        return jsonify({'error': str(e)})

if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)  # Prevent auto-reloading in Jupyter


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [27/Feb/2025 17:25:51] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [27/Feb/2025 17:26:26] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [27/Feb/2025 17:26:26] "GET /favicon.ico HTTP/1.1" 404 -
