In [17]:
import pandas as pd
import numpy as np

In [54]:
data = pd.read_csv('/content/card_transdata.csv')

### Data Preprocessings

In [25]:
# Count of Rows and Features

print('Rows : {}'.format(data.shape[0]))
print('Features: {}'.format(data.shape[1]))

Rows : 1000000
Features: 8


In [26]:
# Remove all the na values from dataset
data.isna().any()

# Gives us all the rows in whih any feature has nan value
data[data.isna().any(axis=1)]

# Drop all the rows with nan values
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

data2 = data.copy()

### Modeling with Random Forest

In [27]:
# Count of the target variables
data['fraud'].value_counts()

# Saperating target variable from dataset
y = data['fraud'] 
x = data.drop(['fraud'], axis=1)

In [28]:
from sklearn.model_selection import train_test_split

# Splitting the data in training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.8, random_state=42)

In [41]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Prepare paramters for modeling
grid_params = {
    'bootstrap': [True],
    'max_depth': [4,6,8],
    'min_samples_split': np.linspace(6, 10, num=5, dtype=int),
    'min_samples_leaf':np.linspace(2, 6, num=5, dtype=int),
    'max_features': ['auto', 'log2'],
    'n_estimators': [7]
}

# Fit the training data into model
classifier = RandomForestClassifier()
grid_search = GridSearchCV(estimator=classifier, param_grid=grid_params,cv=3,n_jobs=-1)
grid_search.fit(x_train, y_train)

print('Best Paramters for radnom forest classifier : ', grid_search.best_params_)

# Predict the test data on newly trained model
predict = grid_search.predict(x_test)

# Accuracy score of the radmom forest
score = accuracy_score(y_test, predict)
print('Accuracy with random forest : ', score)

Best Paramters for radnom forest classifier :  {'bootstrap': True, 'max_depth': 8, 'max_features': 'log2', 'min_samples_leaf': 4, 'min_samples_split': 9, 'n_estimators': 7}
Accuracy with random forest :  0.99986125
