In [1]:
# Q1: Check for Missing Values in the Dataset
import pandas as pd

# Load the dataset
df = pd.read_csv("bank_transactions_data.csv")

# Count missing values column-wise
missing_values = df.isnull().sum()
print("Missing values in each column:\n", missing_values)


Missing values in each column:
 TransactionID              0
AccountID                  0
TransactionAmount          0
TransactionDate            0
TransactionType            0
Location                   0
DeviceID                   0
IP Address                 0
MerchantID                 0
Channel                    0
CustomerAge                0
CustomerOccupation         0
TransactionDuration        0
LoginAttempts              0
AccountBalance             0
PreviousTransactionDate    0
dtype: int64


In [2]:
# Q2: Display All Rows That Have Any Missing Value
rows_with_missing = df[df.isnull().any(axis=1)]
print(rows_with_missing)


Empty DataFrame
Columns: [TransactionID, AccountID, TransactionAmount, TransactionDate, TransactionType, Location, DeviceID, IP Address, MerchantID, Channel, CustomerAge, CustomerOccupation, TransactionDuration, LoginAttempts, AccountBalance, PreviousTransactionDate]
Index: []


In [3]:
# Q3: Drop All Rows Containing Any Missing Values
df_dropped = df.dropna()
print("Shape after dropping rows with NaN:", df_dropped.shape)


Shape after dropping rows with NaN: (2512, 16)


In [4]:
# Q4: Fill Missing Values in a Specific Column with Mean/Mode/Median
median_value = df['TransactionAmount'].median()
df.fillna({'TransactionAmount': median_value}, inplace=True)



In [5]:
# Q5: Replace Specific Placeholder Strings Like ‘N/A’, ‘Missing’ etc.
df.replace(['N/A', 'n/a', 'missing', 'Missing'], pd.NA, inplace=True)


In [6]:
# Q6: Check for Duplicate Rows and Remove Them
duplicates = df.duplicated().sum()
print("Total duplicate rows:", duplicates)

df_cleaned = df.drop_duplicates()
print("Shape after removing duplicates:", df_cleaned.shape)


Total duplicate rows: 0
Shape after removing duplicates: (2512, 16)


In [7]:
# Q7 (Mini EDA Touch): Check for Outliers Using IQR in ‘TransactionAmount'
# "Detect possible outliers in 'TransactionAmount' using the IQR method."
Q1 = df['TransactionAmount'].quantile(0.25)
Q3 = df['TransactionAmount'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['TransactionAmount'] < lower_bound) | (df['TransactionAmount'] > upper_bound)]
print("Number of outliers:", outliers.shape[0])


Number of outliers: 113
