## Importing the libraries

In [175]:
import numpy as np
import pandas as pd

## Loading the data

In [176]:
data = pd.read_csv(r'../data/trx-10k.csv')

In [177]:
data.head()

Unnamed: 0,status,time,card_type,city,amount,id
0,success,2025-09-07 10:48:00,Visa,Tehran,1427657.0,98
1,success,2025-09-20 13:10:00,MastCard,Tehran,1578078.0,52
2,fail,2025-09-29 03:27:00,Visa,Tehran,250000.0,20
3,success,2025-09-15 13:13:00,Discover,Isfahan,0.0,40
4,success,2025-09-11 11:11:00,Visa,Tehran,1782689.0,59


## Extracting some information

In [178]:
data.isna().sum()

status         0
time           0
card_type    311
city         116
amount         0
id             0
dtype: int64

In [179]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   status     10000 non-null  object 
 1   time       10000 non-null  object 
 2   card_type  9689 non-null   object 
 3   city       9884 non-null   object 
 4   amount     10000 non-null  float64
 5   id         10000 non-null  int64  
dtypes: float64(1), int64(1), object(4)
memory usage: 468.9+ KB


In [180]:
data.shape

(10000, 6)

In [181]:
data.size

60000

In [182]:
data.describe(include='all')

Unnamed: 0,status,time,card_type,city,amount,id
count,10000,10000,9689,9884,10000.0,10000.0
unique,6,1724,10,14,,
top,fail,2025-09-29 11:11:00,Visa,Tehran,,
freq,3964,25,2358,2063,,
mean,,,,,295473500.0,50.0612
std,,,,,1692035000.0,28.870875
min,,,,,-999999.0,1.0
25%,,,,,100.5,25.0
50%,,,,,343679.0,50.0
75%,,,,,914465.0,74.0


## Cleaning of data

In [183]:
data.head()

Unnamed: 0,status,time,card_type,city,amount,id
0,success,2025-09-07 10:48:00,Visa,Tehran,1427657.0,98
1,success,2025-09-20 13:10:00,MastCard,Tehran,1578078.0,52
2,fail,2025-09-29 03:27:00,Visa,Tehran,250000.0,20
3,success,2025-09-15 13:13:00,Discover,Isfahan,0.0,40
4,success,2025-09-11 11:11:00,Visa,Tehran,1782689.0,59


## Renaming all the columns

In [184]:
data.rename(columns={
    'status':'Status',
    'time':'Time',
    'card_type':'Card_Type',
    'city':'City',
    'amount':'Amount',
    'id':'ID'
}, inplace = True)

In [185]:
data.head()

Unnamed: 0,Status,Time,Card_Type,City,Amount,ID
0,success,2025-09-07 10:48:00,Visa,Tehran,1427657.0,98
1,success,2025-09-20 13:10:00,MastCard,Tehran,1578078.0,52
2,fail,2025-09-29 03:27:00,Visa,Tehran,250000.0,20
3,success,2025-09-15 13:13:00,Discover,Isfahan,0.0,40
4,success,2025-09-11 11:11:00,Visa,Tehran,1782689.0,59


## Unique values fixing

In [186]:
#success and fail
data['Status'].unique()

data['Status'] = data['Status'].replace({
    'success': 'Success',
    'succeed': 'Success',
    'Success': 'Success',
    'fail': 'Fail',
    'failed': 'Fail',
    'FAIL': 'Fail'
})

In [187]:
#visa, nan, mastercard, discover, amex
data['Card_Type'].unique()

data['Card_Type'] = data['Card_Type'].replace({
    'MastCard':'Master Card',
    'Master-Card':'Master Card',
    'visa':'Visa',
    'Vsa':'Visa',
    'VISA':'Visa',
    'MasterCard':'Master Card'
})

In [188]:
#tehran, isfahan, tabriz, shiraz, karaj, ahvaz, qom, mashhad, nan
data['City'].unique()

data['City'] = data['City'].replace({
    'Tehran ':'Tehran',
    'TEHRAN':'Tehran',
    'karaj':'Karaj',
    'THR':'Tehran',
    'tehr@n':'Tehran',
    'ThRan':'Tehran'
})

## Making a new column
Amount is in negative. It doesn't mean the data is broken or bad it means the payment might have been refunded.

In [189]:
data["Amount_Type"] = np.select(
    [
        data["Amount"] > 0,
        data["Amount"] < 0,
        data["Amount"] == 0
    ],
    [
        "Payment",
        "Refund",
        "Zero_Amount"
    ],
    default="Unknown"
)

## Filling null values

In [190]:
data['Card_Type'] = data['Card_Type'].fillna('Unknown')
data['City'] = data['City'].fillna('Unknown')

## Extracting hour

In [191]:
data['Time'] = pd.to_datetime(data['Time'])

In [192]:
data['Hour'] = data['Time'].dt.hour

In [193]:
data.head(10)

Unnamed: 0,Status,Time,Card_Type,City,Amount,ID,Amount_Type,Hour
0,Success,2025-09-07 10:48:00,Visa,Tehran,1427657.0,98,Payment,10
1,Success,2025-09-20 13:10:00,Master Card,Tehran,1578078.0,52,Payment,13
2,Fail,2025-09-29 03:27:00,Visa,Tehran,250000.0,20,Payment,3
3,Success,2025-09-15 13:13:00,Discover,Isfahan,0.0,40,Zero_Amount,13
4,Success,2025-09-11 11:11:00,Visa,Tehran,1782689.0,59,Payment,11
5,Fail,2025-09-08 21:21:00,Discover,Tehran,1050805.0,77,Payment,21
6,Success,2025-09-29 22:27:00,Visa,Tabriz,1387769.0,14,Payment,22
7,Fail,2025-09-21 03:00:00,Master Card,Tehran,1502601.0,44,Payment,3
8,Success,2025-09-07 12:12:00,Discover,Isfahan,-999999.0,87,Refund,12
9,Fail,2025-09-26 07:32:00,Visa,Tehran,0.0,82,Zero_Amount,7


## Saving the data

In [194]:
data.to_csv(r'iran_transactions.csv', index=False)