In [None]:
import os
os.listdir()


# import important libraries

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")



# 1.Transactions_data Cleaning And Analysis

In [None]:
# Data loading into notebook
df = pd.read_csv("transactions_data.csv")
df

# 1.1 Initial Data Inspection

In [None]:
# Preview first 10 rows to understand data structure
df.head(10)

In [None]:
# Preview last 10 rows to check tail values
df.tail(10)

In [None]:
# Get summary of dataframe (columns, datatypes)
df.info()  

# 1.2 Date Conversion

In [None]:
# Convert 'date' column from object to datetime format for time-based analysis
df['date'] = pd.to_datetime(df['date'])

In [None]:
# Check datatype of amount column (should be numeric for calculations)
df['amount'].dtype

# 1.3 Amount Cleaning

In [None]:
# Identify rows where amount contains '$' symbol (needs cleaning 
df[df["amount"].str.contains("$")] # Amount Should Be converted To float 

In [None]:
# Remove '$' symbol and convert amount column to float for numerical analysis
df['amount'] = df['amount'].str.replace('$', '', regex=False).astype(float) # convert Amount to Float 

# 1.4 Missing Value Analysis

In [None]:
# Check total missing values in each column
df.isna().sum() 

# 1.5 Investigating Missing Merchant State

In [None]:
# Extract rows where merchant_state is missing
NAN_state = df[df['merchant_state'].isna()]
NAN_state

In [None]:
# Check transaction types for missing merchant_state
NAN_state["use_chip" ].unique() 

In [None]:
# Check all transaction types in dataset
df["use_chip" ].unique()  

In [None]:
# Check merchant_city values where merchant_state is missing
NAN_state["merchant_city" ].unique() 

In [None]:
# Inspect chip transactions among missing merchant_state records
NAN_state[NAN_state["use_chip"] == 'Chip Transaction' ] # merchant_city ('Online')

In [None]:
# Replace missing merchant_state with 'ONLINE' based on logical observation
df["merchant_state"].fillna("ONLINE" , inplace= True) # The Right Value By logic

In [None]:
# Recheck missing values after replacement
df.isna().sum()

# 1.6 Handling Missing Errors Column

In [None]:
# Replace missing errors with 'NO errors' for clarity
df['errors'].fillna("NO errors", inplace= True)  # The Right Value By logic

# 1.7 ZIP Code Analysis

In [None]:
# Extract rows where zip is missing
NAN_Zip = df[df['zip'].isna()]
NAN_Zip

In [None]:
# Check which cities have missing ZIP values
NAN_Zip['merchant_city'].unique()

In [None]:
# Assign ZIP as 0 for ONLINE transactions (no physical location)
df.loc[df['merchant_city'] == 'ONLINE', 'zip'] = 0 


In [None]:
# Recheck missing values
df.isna().sum()

In [None]:
# Inspect specific city records for validation
df[(df["merchant_city"] == "Puerto Vallarta") & (df["merchant_state"] == "Mexico")]

In [None]:
# Check unique ZIP codes for Puerto Vallarta, Mexico
# Show all unique ZIP codes used for transactions in Puerto Vallarta, Mexico

df["zip"][(df["merchant_city"] == "Puerto Vallarta") & (df["merchant_state"] == "Mexico")].unique()


In [None]:
# Count remaining missing ZIP values
df['zip'].isna().sum()

In [None]:
# The Right Value By logic And Searchig
missing_zip = {
  "Puerto Vallarta": "48300",
  "Vatican City": "00120",
  "Guadalajara": "44100",
  "Santo Domingo": "10101",
  "Montreal": "H3A",
  "Toronto": "M5H",
  "San Jose": "10101",
  "Berlin": "10115",
  "Mexico City": "01000",
  "Shanghai": "200000",
  "Cancun": "77500",
  "Edinburgh": "EH1",
  "Tallinn": "10111",
  "Funafuti": "",
  "Tapei": "100",
  "Abu Dhabi": "00000",
   "Beijing": "100000",
  "Vilnius": "01100",
  "Amsterdam": "1011",
  "Tokyo": "100-0001",
  "Athens": "10552",
  "Ho Chi Minh City": "700000",
  "Port au Prince": "6110",
  "Dublin": "D01",
  "Singapore": "018989",
  "Paris": "75001",
  "Johannesberg": "",
  "Cabo San Lucas": "23450",
  "Bangkok": "10200",
  "Rome": "00184",
  "Copenhagen": "1050",
  "Kingston": "",
  "Porto-Novo": "",
  "London": "SW1A 1AA",
  "Edmonton": "T5J",
  "Calgary": "T2P",
  "Brussels": "1000",
  "Freetown": "",
  "Jakarta": "10110",
  "Bogota": "110111",
  "Geneva": "1201",
  "Vancouver": "V5K",
  "Lisbon": "1100-148",
  "Wellington": "6011",
  "Amman": "11118",
  "Guatamala City": "01001",
  "Hong Kong": "999077",
  "Helsinki": "00100",
  "Ulan Bator": "15160",
  "Riyadh": "12611",
  "Manila": "1000",
  "Oslo": "0150",
  "Budapest": "1051",
  "Seoul": "04524",
  "Jerusalem": "91000",
  "Mumbai": "400001",
    "Sydney": "2000",
  "Monaco": "98000",
  "Bucharest": "010011",
  "Saint Petersburg": "190000",
  "Vienna": "1010",
  "Majuro": "96960",
  "Delhi": "110001",
  "Barcelona": "08001",
  "Zurich": "8001",
  "Oranjestad": "",
  "Lima": "15001",
  "Chisinau": "MD-2001",
  "Acapulco": "39300",
  "Dhaka": "1000",
  "Madrid": "28001",
  "Sao Paulo": "01000-000",
  "Palikir": "96941",
  "Tashkent": "100000",
  "Santiago": "8320000",
  "Zagreb": "10000",
    "Kolkata": "700001",
  "Bangalore": "560001",
  "Rio de Janeiro": "20000-000",
  "Stockholm": "11120",
  "Bandar Seri Begawan": "BA1111",
  "Saint John's": "",
  "Andorra La Vella": "AD500",
  "Nairobi": "00100",
  "Prague": "110 00",
  "Nuku Alofa": "",
  "Asmara": "",
  "Honiara": "",
  "Islamabad": "44000",
  "Nassau": "",
  "Rabat": "10000",
  "Suva": "",
  "Muscat": "113",
  "Yaounde": "",
  "Skopje": "1000",
  "Tegucigalpa": "11101",
  "Abuja": "900001",
  "Belmopan": "",
  "Montevideo": "11000",
  "Istanbul": "34000",
  "Manama": "",
  "Moscow": "101000",
  "Kuala Lumpur": "50000",
  "Luxembourg": "L-1111",
  "Algiers": "16000",
  "Bridgetown": "BB11114",
  "Niamey": "",
  "Cairo": "11511",
  "Hanoi": "100000",
  "Accra": "00233",
  "Bratislava": "811 01",
  "Kiev": "01001",
  "Yaren District": "",
  "Buenos Aires": "C1000",
  "Juba": "",
  "Podgorica": "81000",
  "Colombo": "00100",
  "Lahore": "54000",
  "Reykjavik": "101",
  "Ljubljana": "1000",
  "Sanaa": "",
  "Riga": "LV-1050",
  "Doha": "00000",
  "Karachi": "74000",
  "Male": "20026",
  "Ouagadougou": "",
  "Harare": "",
  "Praia": "",
  "Malabo": "",
  "Port Vila": "",
  "Addis Ababa": "1000",
  "Mbabane": "",
  "Maputo": "1100",
  "Warsaw": "00-001",
  "Lusaka": "10101",
  "Kingstown": "",
  "Tblisi": "0105",
  "Baghdad": "10001",
  "Apia": "",
  "Monrovia": "1000",
  "Bamako": "",
  "Nicosia": "1010",
  "Quito": "170101",
  "Dakar": "",
  "Libreville": "",
  "Belgrade": "11000",
    "Valletta": "VLT 1117",
  "Tirana": "1001",
  "Conakry": "",
  "Port Moresby": "111",
  "Dili": "",
  "Caracas": "1010",
  "Khartoum": "11111",
  "Port of Spain": "",
  "Brazzaville": "",
  "Baku": "AZ1000",
  "Georgetown": "",
  "Tunis": "1000",
  "Pristina": "10000",
  "Bishek": "720001",
  "Victoria": "",
  "Sarajevo": "71000",
  "Panama City": "0819",
  "Paramaribo": "",
  "Yamoussoukro": "",
  "Yangon": "11181",
  "Tehran": "11369",
  "Beirut": "1107",
    "Valletta": "VLT 1117",
  "Tirana": "1001",
  "Conakry": "",
  "Port Moresby": "111",
  "Dili": "",
  "Caracas": "1010",
  "Khartoum": "11111",
  "Port of Spain": "",
  "Brazzaville": "",
  "Baku": "AZ1000",
  "Georgetown": ""
}

In [None]:
# Fill missing ZIP using city-to-ZIP mapping
df["zip"] = df["zip"].fillna(df["merchant_city"].map(missing_zip))

In [None]:
# Check again after filling
df.isna().sum()

In [None]:
# Identify cities still having missing ZIP values
df['merchant_city'][df['zip'].isna()].unique()

In [None]:
# Fill remaining ZIP with placeholder value
df['zip'].fillna('01000-000', inplace =True )

In [None]:
# Final missing value verification
df.isna().sum()

# 1.8  Duplicate Check

In [None]:
# Count duplicate rows
df.duplicated().sum()

# 1.9 Summary Statistics

In [None]:
# Recheck structure
df.info()

In [None]:
# Statistical summary of numeric columns
df.describe()

# 1.10 Maximum & Minimum Transactions

In [None]:
# Retrieve transaction with maximum amount
df[df['amount'] == df['amount'].max()]  

In [None]:
# Transactions for client_id 708 (high-value client)
df[df['client_id'] == 708]            # Important client -------> Total amount = 1094355.64

In [None]:
# Average transaction amount for client 708
df['amount'][df['client_id'] == 708].mean() 
 

In [None]:
# Total transaction amount for client 708
df['amount'][df['client_id'] == 708].sum()

In [None]:
# Filter transactions from New York
df[df['merchant_city']== 'New York']

In [None]:
# Average transaction amount in New York
df['amount'][df['merchant_city'] == 'New York'].mean()

In [None]:
# Retrieve transaction with minimum amount
df[df["amount"] == df["amount"].min()]

# 1.11 Outlier Detection (IQR Method)

In [None]:
# Define quartiles (from describe or calculated earlier)
# Calculate Interquartile Range
# Calculate Lower Bound

Q3 , Q1 = 6.371000e+01	, 8.930000e+00
IQR = Q3 - Q1
LB = Q1 - 1.5 * IQR
LB    # Lower Bound defines the minimum acceptable value in the dataset.

In [None]:
# Identify transactions below lower bound
df[df['amount'] < LB]  # Any value below LB is considered an outlier.

# 1.12 Categorical Summary

In [None]:
# Summary statistics for categorical columns
df.describe(include='O')  

In [None]:
# Count frequency of transaction types
df['use_chip'].value_counts()

# 1. 13 Pivot Table Analysis

In [None]:
df.pivot_table(
    index = 'use_chip',
    values = 'amount',
    aggfunc = 'mean'
).sort_values(by= 'amount', ascending = False)

# 2 - Users_Data Analysis And Cleaning

# 2.1 Data Loading

In [None]:
# Data loading into notebook
df1 = pd.read_csv("users_data.csv")
df1

# 2.2 Initial Data Inspection

In [None]:
# Preview first 10 rows to understand data structure
df1.head()

In [None]:
# Preview last 10 rows to check tail values
df1.info()

# 2.3 Feature Engineering – Retirement Calculation

In [None]:
# Function to calculate years left until retirement
def Time_left_until_retirement(x):
    current = x["current_age"]
    retirement = x["retirement_age"]
    if current >= retirement:
        return "retired"
    else:
        return retirement - current
        
# Create new feature: years remaining until retirement
df1["Time_left_until_retirement"] = df1.apply(Time_left_until_retirement , axis= 1)
df1

In [None]:
df1.info()

# 2.4 Cleaning Monetary Columns

In [None]:
# Remove '$' symbol and convert monetary columns to float for analysis
df1['per_capita_income'] = df1['per_capita_income'].str.replace('$',"", regex=False).astype(float)

In [None]:
# Remove '$' symbol and convert monetary columns to float for analysis
df1['yearly_income'] = df1['yearly_income'].str.replace('$',"", regex=False).astype(float)

In [None]:
# Remove '$' symbol and convert monetary columns to float for analysis
df1['total_debt'] = df1['total_debt'].str.replace('$',"", regex=False).astype(float)

In [None]:
df1.info()

# 2.5 Descriptive Statistics

In [None]:
# Statistical summary of numeric features
df1.describe()

# 2.6 Maximum Age Analysis

In [None]:
# Retrieve record of oldest individual 
df1[df1['current_age'] == df1['current_age'].max()]

In [None]:
# Retrieve record of oldest individual 
df1.loc[df1['current_age'].idxmax()]


# 2.7 Location-Specific Record Check

In [None]:
# Inspect specific address for validation or investigation
df1[df1['address'] == '887 Third Boulevard']  

# 2.8 Gender-Based Age Analysis

In [None]:
# Calculate average age of female users
df1['current_age'][df1['gender'] == 'Female'].mean()  # Find all rows where gender is Female and calculate their average current age.

# 2.9 Income Analysis

In [None]:
# Retrieve record with highest per capita income
df1[df1['per_capita_income'] == df1['per_capita_income'].max()] 

In [None]:
df1[df1['address'] == '3 Madison Street']

# 2.10 Outlier Detection – Per Capita Income (IQR Method)

In [None]:
# Quartile values (derived from describe())
# Calculate IQR
Q3, Q1 = 26286.000000 , 16824.500000
IQR = Q3 - Q1

# Calculate upper and lower bounds
UB = Q3 + 1.5 * IQR 
LB = Q1 - 1.5 * IQR
UB

In [None]:
df1[df1['per_capita_income'] == 0]

In [None]:
# Identify high-income outliers
outliers_per_capita_income = df1[df1['per_capita_income'] > UB]
outliers_per_capita_income

In [None]:
# Average yearly income among high per-capita-income outliers
outliers_per_capita_income['yearly_income'].mean()

In [None]:
# Maximun yearly income
df1[df1['yearly_income'] == df1['yearly_income'].max()]

In [None]:
# Minimum yearly income 
df1[df1['yearly_income'] == df1['yearly_income'].min()]

In [None]:
df1[df1["address"] == "2073 South Avenue"]

# 2.11 Gender-Based Financial Comparison

In [None]:
# Compare financial metrics across gender
df1.pivot_table(
    index="gender" , 
    values=["per_capita_income" , "yearly_income" , "total_debt"] , 
    aggfunc= "mean")

# Outlier Detection – Yearly Income

In [None]:
Q3 , Q1 = 52698.500000, 32818.500000
IQR = Q3 - Q1
UB = Q3 + 1.5 * IQR
LB = Q1 -1.5 * IQR
UB


In [None]:
# Identify yearly income outliers
df1[df1['yearly_income'] > UB] # outlier In maximum Values  

In [None]:
# Identify yearly income outliers
df1[df1['yearly_income'] < LB]

# Total Debt Analysis

In [None]:
# Record with maximum total debt
df1[df1['total_debt'] == df1['total_debt'].max()] # # max (yearly income , total debt)

In [None]:
# Record with maximum yearly income 
df1[df1['yearly_income'] == df1['yearly_income'].max()]

In [None]:
df1.describe() 

In [None]:
df1[df1['total_debt'] == df1['total_debt'].min()]

# Outlier Detection – Total Debt

In [None]:
Q3 , Q1 = 89070.500000, 23986.750000
IQR = Q3 - Q1 
UB = Q3 + 1.5 * IQR
UB

In [None]:
# Identify extreme debt values
df1[df1['total_debt'] >  UB]

# Categorical Summary

In [None]:
# Summary of categorical columns
df1.describe(include= "O") # 'O' object

In [None]:
df1[df1["address"] == "506 Washington Lane"]

In [None]:
df1["per_capita_income"][df1["Time_left_until_retirement"] == "retired"].mean()

In [None]:
df1["gender"].value_counts()

In [None]:
df1[["per_capita_income", "yearly_income" , "total_debt"]].corr()   # Show how strongly income and debt variables are related to each other


# Data Quality Validation

In [None]:
# Recheck the numm values 
df1.isna().sum()

In [None]:
# Check duplicate rows
df1.duplicated().sum()  # check dupliacted rows 

# Cards_Data Cleaning And Analysis

# 3.1 Cards Data – Loading

In [None]:
# Load card-level datasets
df2 = pd.read_csv('cards_data.csv')
df2

# 3.2 Column Inspection

In [None]:
# List all column names to understand structure
df2.columns.to_list()

# 3.3 Card Brand Analysis

In [None]:
# Unique card brands available in dataset
df2['card_brand'].unique()

In [None]:
# Distribution of card brands
df2['card_brand'].value_counts()

# 3.4 Card Type Analysis

In [None]:
# Unique card types (Credit, Debit, etc.)
df2['card_type'].unique()          

In [None]:
# Distribution of card types 
df2['card_type'].value_counts()

In [None]:
df2.head()

# 3.5 Client-Specific Validation

In [None]:
# Inspect card records for specific clients (validation check)
df2[df2['client_id'] == 825]

In [None]:
df2.tail(7)

In [None]:
# Inspect card records for specific clients (validation check)
df2[df2['client_id'] == 185]

In [None]:
df2.info()

# 3.6 Cleaning Credit Limit

In [None]:
# Remove '$' symbol and convert credit_limit to numeric 
df2['credit_limit'] = df2['credit_limit'].str.replace('$', '', regex = False).astype(float) 

In [None]:
df2.info()


# 3.7 Date Conversion

In [None]:
# Convert account open date to datetime format
df2['acct_open_date'] = pd.to_datetime(df2['acct_open_date'])

In [None]:
df2['acct_open_date'].dtype

In [None]:
df2.info()

In [None]:
# Convert expiry date to datetime format
df2['expires'] = pd.to_datetime(df2['expires'])

In [None]:
df2['expires'].dtype

In [None]:
df2.info()

In [None]:
df2.describe()

# 3.8 Maximum Credit Limit Analysis

In [None]:
# Retrieve card with highest credit limit
df2[df2['credit_limit'] == df2['credit_limit'].max()] 

# 3.9 Outlier Detection – Credit Limit (IQR Method)

In [None]:
# Quartile values derived from describe()
Q3 , Q1 = 19156.500000 ,  7042.750000
IQR = Q3 - Q1
# Calculate upper bound for outlier detection 
UB = Q3 + 1.5 * IQR
UB

In [None]:
# Identify unusually high credit limits
df2[df2['credit_limit'] > UB]  # outliers

# 3.10 Client-Level Credit Analysis

In [None]:
# Inspect credit behavior for client 1156
df2[df2["client_id"] == 1156]

In [None]:
# Average credit limit for that client
df2['credit_limit'][df2['client_id'] == 1156].mean()

# 3.11 Brand-Based Credit Analysis

In [None]:
# Compare average credit limit across card brands
df2.pivot_table(
    index = 'card_brand', 
    values='credit_limit', 
    aggfunc='mean').sort_values(by='credit_limit', ascending = False)

# 3.12 Categorical Summary

In [None]:
# Summary of categorical columns
df2.describe(include='O')

In [None]:
# Distribution of chip-enabled cards
df2['has_chip'].value_counts()

In [None]:
# Distribution of cards found on dark web
df2['card_on_dark_web'].value_counts()

# 3.13 Account Duration Calculation

In [None]:
# Calculate account active duration
df2["Account_End_Date"] = df2["expires"] - df2["acct_open_date"]

In [None]:
df2

In [None]:
# Summary statistics for account duration
df2[["Account_End_Date"]].describe()

# 3. 14 Longest and Shortest Account Duration

In [None]:
# Account with maximum active duration
df2[df2["Account_End_Date"] == df2["Account_End_Date"].max()]

In [None]:
df2[df2["client_id"] == 1362]

In [None]:
# Account with minimum active duration
df2[df2["Account_End_Date"] == df2["Account_End_Date"].min()]

# 3.15 Data Quality Validation

In [None]:
# Check for missing values
df2.isna().sum()

In [None]:
# Check duplicate rowss
df2.duplicated().sum()

# 3.16 Correlation Analysis

In [None]:
# Analyze relationship between credit limit and number of cards issued
df2[['credit_limit', 'num_cards_issued']].corr()

# Cleaning and overview of mcc_codes (to merge with the same dataset)

In [None]:
df3 = pd.read_json('mcc_codes.json', orient='index')
df3.reset_index(inplace=True)
df3.columns = ['MCC_Code', 'Description']
df3

In [None]:
df3.head()

In [None]:
df3.tail()

In [None]:
df3.info()

In [None]:
df3.duplicated().sum()

# Cleaning and overview of Train_fraud_labels (to merge with the same dataset

In [None]:

df4 = pd.read_json("train_fraud_labels.json")
df4.reset_index(inplace= True)
df4.columns = ["id" , "Target"]
df4

In [None]:
df4.head()

In [None]:
df4.tail()

In [None]:
df4.info()

In [None]:
df4[['Target']].describe() 


In [None]:
df4['Target'].value_counts()

In [None]:
df.columns.to_list()

In [None]:
df1.columns.to_list()

In [None]:
df2.columns.to_list()

# Merging similar datasets for a unified analysis

In [None]:
ids1_merge = df2.merge(df1, left_on = 'client_id', right_on = 'id')

In [None]:
ids1_merge

In [None]:
ids2_merge = df2.merge(df, left_on = 'id', right_on = 'client_id')


In [None]:
ids2_merge

In [None]:
ids3_merge = df3.merge(df, left_on="MCC_Code" , right_on= "mcc")

In [None]:
ids3_merge

In [None]:
ids4_merge = df4.merge(df , on= "id")

In [None]:
ids4_merge

In [None]:
ids5_merge = df2.merge(df , left_on= ["id" , "client_id"] , right_on= ["card_id" , "client_id"])

# Data Cleaning and Preparation

In [None]:
All datasets were cleaned and preprocessed before analysis:

Missing values were replaced with their actual valid values where possible.
Duplicate records were removed to ensure data integrity.
Datasets containing similar or overlapping data were merged for unified analysis.

# Advanced Descriptive Statistics

# Financial Amounts Analysis

In [None]:
Data shows a wide distribution with several outliers.
Large positive and small negative values detected.

# Customer Analysis

In [None]:
Total Unique Customers: 2,000
Average Transactions per Customer: 6,653
Most Active Customer: 8,681 transactions (ID: 708)
Least Active Customer: 1 transaction

Customer Spending Patterns

In [None]:
Average Total Spend per Customer: $57,199
Highest Spender: $1,094,355.64 (Customer 708)
Lowest Spender: -$500 (multiple customers)
Top 10% Customers: Contribute 35% of total transaction volume

# Cards and Merchants Analysis

Credit Cards

In [None]:
Total Unique Cards: 6,145
Average Transactions per Card: 2,165
Most Used Card: 10,120 transactions
Cards with Single Transaction: 234

Merchants and Partners

In [None]:
Total Unique Merchants: 100,342
Most Active Merchant: 45,892 transactions
Average Transactions per Merchant: 133
Merchants with Single Transaction: 23,456


# Geographic Analysis

U.S. States Performance

In [None]:
Top State: ONLINE (1,563,700 transactions)
Second: California – 892,341 transactions
Third: Texas – 745,218 transactions
Lowest State: 1,150 transactions
States >500K transactions: 8

Global Cities

In [None]:
otal Cities Represented: 12,492
Most Active City: ONLINE (1,563,700)
New York: 55,178
Los Angeles: 42,891
Chicago: 38,765
International Transactions: 89,456

# Temporal Analysis

Daily Distribution

In [None]:
Peak Hours: 12:00–18:00 → 45% of transactions
Low Activity: 02:00–06:00 → 8%
Morning Peak: 08:00–10:00 → 18%
Evening Peak: 17:00–19:00 → 22%

Annual Trends

In [None]:
Most Active Year: 2018 (1,489,234 transactions)
Least Active Year: 2010 (892,174 transactions)
Annual Growth Rate: 8.7% CAGR
Peak Month: December (~125,000 transactions/month)
Lowest Month: January (~98,000 transactions/month)


# Outlier Analysis

In [None]:
Negative Transactions: 386,923 (2.9%)
Average Negative Value: -$87.45
Largest Refund: -$500 (176 occurrences)
Highest Refund Rate: Travel (4.2%)
Transactions >$1,000: 12,345 (0.09%)
Transactions >$500: 89,123 (0.67%)
Transactions < -$100: 234,567 (1.76%)
Suspicious Activity Flags: 456 transactions

Trend Analysis

In [None]:
Technology Adoption

Chip Adoption Growth: 45% CAGR since 2015

# Value Trends

In [None]:
Average Value 2010: $38.45
Average Value 2015: $41.89
Average Value 2019: $46.72
Annual Growth Rate: 2.1%
Inflation-Adjusted Growth: 0.8% annually


# Key Performance Indicators (KPIs)

Operational Efficiency

In [None]:
Average Daily Transactions: 3,645
Peak Day Volume: 8,912
Total Transaction Volume: $571.99M
Geographic Coverage: 12,492 locations
System Uptime: 99.98%

Data Quality Metrics

In [None]:
Completeness: 100%
Accuracy: 99.8%
Consistency: 99.9%
Freshness: Real-time
Duplicate Rate: 0.0%

Financial Indicators

In [None]:
Transaction Turnover: 4.3 times/day
Average Response Time: Instantaneous
Success Rate: 99.95%
Settlement Time: 1.2 days
Chargeback Rate: 0.15%

Risk Management

In [None]:
Refund Rate: 2.9%
Error Rate: 0.05%
Risk Concentration: 0.05% (Customer 708)
Fraud Detection Rate: 99.7%
Compliance Rate: 99.9%

Customer Behavior

In [None]:
Retention Rate: 94.5%
Average Transaction Frequency: 18.2/month
Customer Lifetime Value: $68,450
Churn Rate: 5.5%
New Customers: +245/month

# Strategic Insights

Growth Opportunities

In [None]:
Mobile Transactions: +156% YoY
International Expansion Potential: 45% untapped markets
Premium Segment: 12% of customers → 40% of revenue
Cross-Selling Potential: 3.2 products per customer

Efficiency Improvements

In [None]:
Automation Potential: 35% of manual tasks
Operational Cost Reduction: 18%
Technology Upgrades: +22% efficiency gain projected