In [None]:
#Anomaly Detection: Identifying unusual patterns or deviations from normal behavior within data.

import numpy as np
from sklearn.ensemble import IsolationForest

# Generate some sample data
rng = np.random.RandomState(42)
X = 0.3 * rng.randn(100, 2)
X_train = np.r_[X + 2, X - 2]
X_test = np.r_[X + 2, X - 2]
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

# Fit the Isolation Forest model
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(X_train)

# Predict anomalies
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)

# Print the results
print("Anomaly scores for training data:", clf.decision_function(X_train))
print("Predictions for training data:", y_pred_train)
print("Predictions for test data:", y_pred_test)
print("Predictions for outliers:", y_pred_outliers)


Anomaly scores for training data: [ 0.04137682 -0.00682768  0.06270133 -0.06691297  0.06218438  0.05259401
 -0.05630401 -0.04869782  0.04198508 -0.03368517 -0.05830562  0.00076747
  0.05642573  0.01921307  0.04724951 -0.04977738  0.03002718 -0.0320237
 -0.06015935  0.00020333  0.03494157  0.06591444 -0.03488402  0.03333535
 -0.03384962  0.05438955  0.04203942  0.00063081  0.03325709  0.05352403
  0.05407624 -0.03208207 -0.01068931  0.05069957  0.04989699  0.00630612
 -0.00045256 -0.1127287   0.06328468 -0.06575771  0.07465552 -0.06579169
  0.0288597   0.02295268  0.06011255  0.06011693  0.03917077 -0.01985746
  0.0698299   0.06164545 -0.01765337  0.03492802  0.07630946 -0.0957069
  0.06132884 -0.05860204 -0.07722439  0.07763083  0.01371992 -0.01281897
  0.00119801 -0.09555787 -0.07912213  0.0203897   0.06175847 -0.02682507
  0.02763652 -0.03375448  0.03896657 -0.03070274  0.04054464 -0.03333141
  0.07052231 -0.05966561  0.0511496   0.07468381  0.04463635  0.05230447
 -0.09571446 -0.003

In [None]:
#Machine Learning Models: Employing algorithms like Logistic Regression. Decision Trees, or

import numpy as np
from sklearn.linear_model import LogisticRegression

# Assuming you have a target variable 'y' for your data
# Replace this with your actual target variable
y_train = np.concatenate((np.ones(len(X_train) // 2), -np.ones(len(X_train) // 2)))
y_test = np.concatenate((np.ones(len(X_test) // 2), -np.ones(len(X_test) // 2)))

# Create and train the Logistic Regression model
logreg = LogisticRegression(random_state=42)
logreg.fit(X_train, y_train)

# Make predictions
y_pred_train = logreg.predict(X_train)
y_pred_test = logreg.predict(X_test)

# Evaluate the model (example using accuracy)
from sklearn.metrics import accuracy_score
print("Training accuracy:", accuracy_score(y_train, y_pred_train))
print("Test accuracy:", accuracy_score(y_test, y_pred_test))


Training accuracy: 1.0
Test accuracy: 1.0


In [None]:
#Feature Engineering: Selecting and transforming relevant features to enhance fraud detection accuracy.

from sklearn.preprocessing import StandardScaler

# Select relevant features (example: assuming first two features are important)
X_train_selected = X_train[:, :2]
X_test_selected = X_test[:, :2]

# Transform features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)

# Retrain the Logistic Regression model with scaled features
logreg_scaled = LogisticRegression(random_state=42)
logreg_scaled.fit(X_train_scaled, y_train)

# Make predictions with scaled features
y_pred_train_scaled = logreg_scaled.predict(X_train_scaled)
y_pred_test_scaled = logreg_scaled.predict(X_test_scaled)

# Evaluate the model with scaled features
print("Training accuracy (scaled features):", accuracy_score(y_train, y_pred_train_scaled))
print("Test accuracy (scaled features):", accuracy_score(y_test, y_pred_test_scaled))


Training accuracy (scaled features): 1.0
Test accuracy (scaled features): 1.0


In [None]:
#Real-time Monitoring: Implementing systems that can detect and respond to fraudulent activities in real-time.

import time

# Simulate real-time data stream
def stream_data():
  while True:
    # Generate random data point
    data_point = rng.uniform(low=-4, high=4, size=(1, 2))

    # Predict anomaly
    anomaly_score = clf.decision_function(data_point)
    is_anomaly = clf.predict(data_point)

    # Print results
    print(f"Data point: {data_point}, Anomaly score: {anomaly_score}, Anomaly: {is_anomaly}")

    # Simulate delay
    time.sleep(0)

# Start real-time monitoring
stream_data()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Data point: [[-3.45749418 -2.44799202]], Anomaly score: [-0.14321931], Anomaly: [-1]
Data point: [[-3.93214271 -2.26615009]], Anomaly score: [-0.11683764], Anomaly: [-1]
Data point: [[0.03359431 2.42008673]], Anomaly score: [-0.14788705], Anomaly: [-1]
Data point: [[-3.42126548 -2.84560446]], Anomaly score: [-0.17581453], Anomaly: [-1]
Data point: [[-0.93343819 -1.52645922]], Anomaly score: [-0.12078839], Anomaly: [-1]
Data point: [[-2.94928014  3.01509709]], Anomaly score: [-0.2012284], Anomaly: [-1]
Data point: [[3.4344746  0.25058465]], Anomaly score: [-0.16858851], Anomaly: [-1]
Data point: [[-1.35497435 -0.76876491]], Anomaly score: [-0.15739798], Anomaly: [-1]
Data point: [[ 1.47841316 -0.98635502]], Anomaly score: [-0.14251592], Anomaly: [-1]
Data point: [[-0.87961356 -1.91140584]], Anomaly score: [-0.09227016], Anomaly: [-1]
Data point: [[2.41028852 0.66922719]], Anomaly score: [-0.1355425], Anomaly: [-1]
Data poi

KeyboardInterrupt: 