In [19]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

In [12]:
data = pd.read_csv('creditcard.csv')

In [13]:
# Display the first few rows of the dataframe
print(data.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [14]:
# Check for missing values
print(data.isnull().sum())

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [15]:
#Split the Data into Features and Target Variable:
X = data.drop('Class', axis=1)  # Features
y = data['Class']  # Target variable

In [16]:
#Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Apply SMOTE for balancing
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)

#Train a Machine Learning Model
# Initialize the model
rf = RandomForestClassifier(random_state=42)
# Train the model
rf.fit(X_res, y_res)

In [25]:
loaded_model = joblib.load('creditcard_model.joblib')
print(loaded_model)  # Print the loaded model to inspect its content


RandomForestClassifier()


In [26]:
# Load the serialized model from file
try:
    loaded_model = joblib.load('creditcard_model.joblib')
except FileNotFoundError:
    print("Error: Model file not found.")
except Exception as e:
    print("Error loading model:", e)
else:
    # Use the loaded model to make predictions
    try:
        print("Model loaded successfully:", loaded_model)
        # Debugging: Print the shape of X_test
        print("Shape of X_test:", X_test.shape)
        # Debugging: Print the first few rows of X_test
        print("First few rows of X_test:", X_test.head())
        predictions = loaded_model.predict(X_test)
        print("Predictions:", predictions)
    except Exception as e:
        print("Error making predictions:", e)


Model loaded successfully: RandomForestClassifier()
Shape of X_test: (56962, 30)
First few rows of X_test:             Time         V1        V2         V3        V4         V5  \
43428    41505.0 -16.526507  8.584972 -18.649853  9.505594 -13.793819   
49906    44261.0   0.339812 -2.743745  -0.134070 -1.385729  -1.451413   
29474    35484.0   1.399590 -0.590701   0.168619 -1.029950  -0.539806   
276481  167123.0  -0.432071  1.647895  -1.669361 -0.349504   0.785785   
278846  168473.0   2.014160 -0.137394  -1.015839  0.327269  -0.182179   

              V6         V7        V8        V9  ...       V20       V21  \
43428  -2.832404 -16.701694  7.517344 -8.507059  ... -1.514923  1.190739   
49906   1.015887  -0.524379  0.224060  0.899746  ...  0.506044 -0.213436   
29474   0.040444  -0.712567  0.002299 -0.971747  ...  0.212877  0.102398   
276481 -0.630647   0.276990  0.586025 -0.484715  ... -0.244633  0.358932   
278846 -0.956571   0.043241 -0.160746  0.363241  ... -0.255293 -0.238644  

In [27]:
import pickle

# Serialize the model to a file
with open('creditcard_model.pkl', 'wb') as file:
    pickle.dump(model, file)

In [29]:
# Load the serialized model from file
try:
    with open('creditcard_model.pkl', 'rb') as file:
        loaded_model = pickle.load(file)
except FileNotFoundError:
    print("Error: Model file not found.")
except Exception as e:
    print("Error loading model:", e)
else:
    # Use the loaded model to make predictions
    try:
        print("Model loaded successfully:", loaded_model)
        # Debugging: Print the shape of X_test
        print("Shape of X_test:", X_test.shape)
        # Debugging: Print the first few rows of X_test
        print("First few rows of X_test:", X_test.head())
        predictions = loaded_model.predict(X_test)
        print("Predictions:", predictions)
    except Exception as e:
        print("Error making predictions:", e)

Model loaded successfully: RandomForestClassifier()
Shape of X_test: (56962, 30)
First few rows of X_test:             Time         V1        V2         V3        V4         V5  \
43428    41505.0 -16.526507  8.584972 -18.649853  9.505594 -13.793819   
49906    44261.0   0.339812 -2.743745  -0.134070 -1.385729  -1.451413   
29474    35484.0   1.399590 -0.590701   0.168619 -1.029950  -0.539806   
276481  167123.0  -0.432071  1.647895  -1.669361 -0.349504   0.785785   
278846  168473.0   2.014160 -0.137394  -1.015839  0.327269  -0.182179   

              V6         V7        V8        V9  ...       V20       V21  \
43428  -2.832404 -16.701694  7.517344 -8.507059  ... -1.514923  1.190739   
49906   1.015887  -0.524379  0.224060  0.899746  ...  0.506044 -0.213436   
29474   0.040444  -0.712567  0.002299 -0.971747  ...  0.212877  0.102398   
276481 -0.630647   0.276990  0.586025 -0.484715  ... -0.244633  0.358932   
278846 -0.956571   0.043241 -0.160746  0.363241  ... -0.255293 -0.238644  