In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Load the data
df = pd.read_csv('land_transactions.csv')

In [None]:
# Clean the data
df = df.drop_duplicates() # Remove duplicates
df = df.dropna() # Remove missing values

In [None]:
# Feature engineering
df['distance'] = abs(df['buyer_address'] - df['seller_address']) # Calculate distance between addresses
df['previous_transactions_buyer'] = df.groupby('buyer_name')['transaction_id'].transform('count') # Calculate previous transactions for buyer
df['previous_transactions_seller'] = df.groupby('seller_name')['transaction_id'].transform('count') # Calculate previous transactions for seller

In [None]:
# Define the features and target variable
features = ['property_location', 'property_value', 'distance', 'previous_transactions_buyer', 'previous_transactions_seller']
target = 'fraudulent'

In [None]:
# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train = df[:train_size]
test = df[train_size:]

In [None]:
# Train the model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(train[features], train[target])

In [None]:
# Make predictions on the test set
test_predictions = rf.predict(test[features])

In [None]:
# Evaluate the model performance
accuracy = accuracy_score(test[target], test_predictions)
precision = precision_score(test[target], test_predictions)
recall = recall_score(test[target], test_predictions)
f1 = f1_score(test[target], test_predictions)

In [None]:
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)