In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Define paths to your datasets
path_to_train_dataset = '/content/drive/MyDrive/fraudTrain.csv'
path_to_test_dataset = '/content/drive/MyDrive/fraudTest.csv'

# Load the datasets
train_df = pd.read_csv(path_to_train_dataset)
test_df = pd.read_csv(path_to_test_dataset)

# Display the first few rows and column names of the datasets
print("Training Dataset preview:")
print(train_df.head())
print("\nTraining Dataset column names:")
print(train_df.columns)

print("\nTesting Dataset preview:")
print(test_df.head())
print("\nTesting Dataset column names:")
print(test_df.columns)

# Assuming the target column is named 'is_fraud' where 1 indicates fraud and 0 indicates legitimate
target_column = 'is_fraud'

# Ensure the datasets have the necessary columns
if target_column not in train_df.columns or target_column not in test_df.columns:
    raise KeyError(f"'{target_column}' column not found in the datasets. Please check the column names.")

# Define features and target variable
X_train = train_df.drop(target_column, axis=1)
y_train = train_df[target_column]
X_test = test_df.drop(target_column, axis=1)
y_test = test_df[target_column]

# Drop non-numeric columns for simplicity (you may need to handle these columns appropriately)
non_numeric_columns = ['Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'merchant', 'category', 'first', 'last', 'gender', 'street', 'city', 'state', 'zip', 'job', 'dob', 'trans_num']
X_train = X_train.drop(non_numeric_columns, axis=1)
X_test = X_test.drop(non_numeric_columns, axis=1)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
log_reg = LogisticRegression(random_state=42)
decision_tree = DecisionTreeClassifier(random_state=42)
rand_forest = RandomForestClassifier(random_state=42)

# Train models
log_reg.fit(X_train, y_train)
decision_tree.fit(X_train, y_train)
rand_forest.fit(X_train, y_train)

# Make predictions
log_reg_preds = log_reg.predict(X_test)
decision_tree_preds = decision_tree.predict(X_test)
rand_forest_preds = rand_forest.predict(X_test)

# Evaluate models
print("Logistic Regression:")
print("Accuracy:", accuracy_score(y_test, log_reg_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, log_reg_preds))
print("Classification Report:\n", classification_report(y_test, log_reg_preds))

print("Decision Tree:")
print("Accuracy:", accuracy_score(y_test, decision_tree_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, decision_tree_preds))
print("Classification Report:\n", classification_report(y_test, decision_tree_preds))

print("Random Forest:")
print("Accuracy:", accuracy_score(y_test, rand_forest_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, rand_forest_preds))
print("Classification Report:\n", classification_report(y_test, rand_forest_preds))
