# ERROR HANDLING IN DATA ANALYTICS

1️⃣ Convert Stock Prices to Floats, Replace Invalid Prices with 0

stocks = [{"Stock": "Tata", "Price": "1200"}, {"Stock": "Reliance", "Price": "Invalid"}, {"Stock": "Infosys", "Price": None}]

In [30]:
stocks = [{"Stock": "Tata", "Price": "1200"}, {"Stock": "Reliance", "Price": "Invalid"}, {"Stock": "Infosys", "Price": None}]

import pandas as pd
df=pd.DataFrame(stocks)
print(df)

def cleaned(value):
    try:
        stock=str(value["Stock"])if value["Stock"] else "unknown"
        price=float(value["Price"])if not pd.isna(value["Price"]) else 0
        return {"Stock":stock, "Price":price}
    except ValueError:
        print("you have a valuerror")
        return {"Stock":stock, "Price":0}
    except TypeError:
        print("you have a typerror")
        return {"Stock":stock, "Price":0}

df["latest"]=df.apply(cleaned,axis=1)
print(df["latest"])

      Stock    Price
0      Tata     1200
1  Reliance  Invalid
2   Infosys     None
you have a valuerror
0    {'Stock': 'Tata', 'Price': 1200.0}
1     {'Stock': 'Reliance', 'Price': 0}
2      {'Stock': 'Infosys', 'Price': 0}
Name: latest, dtype: object


2️⃣ Fill Missing Values in Product Sales Data

data = {"Product": ["Laptop", None, "Mobile"], "Price": [75000, None, 25000]}

In [32]:
data = {"Product": ["Laptop", None, "Mobile"], "Price": [75000, None, 25000]}

import pandas as pd 
df=pd.DataFrame(data)
print(df)

def cleaned(value):
    try:
        product = str(value["Product"]) if value["Product"] else "unknown"
        price = float(value["Price"]) if not pd.isna(value["Price"]) else 0
        return {"Product":product,"Price":price}
    except ValueError:
        return {"Product":product,"Price":0}
    except TypeError:
        return {"Product":product,"Price":0}

df["latest"]=df.apply(cleaned,axis=1)

print(df)

  Product    Price
0  Laptop  75000.0
1    None      NaN
2  Mobile  25000.0
  Product    Price                                   latest
0  Laptop  75000.0  {'Product': 'Laptop', 'Price': 75000.0}
1    None      NaN       {'Product': 'unknown', 'Price': 0}
2  Mobile  25000.0  {'Product': 'Mobile', 'Price': 25000.0}


3️⃣ Validate Order Details

orders = [{"OrderID": 101, "DeliveryDate": "2023-01-10"}, {"OrderID": None, "DeliveryDate": "Invalid Date"}]

In [49]:
orders = [{"OrderID": 101, "DeliveryDate": "2023-01-10"}, {"OrderID": None, "DeliveryDate": "Invalid Date"}]

import pandas as pd 
df=pd.DataFrame(orders)
print(df)

def cleaned(value):
    try:
        id=int(value["OrderID"]) if not pd.isna(value["OrderID"]) else 0
        date=pd.to_datetime("DeliveryDate",errors="coerce")
        return {"OrderID":id,"DeliveryDate":date}
    except ValueError:
        return {"OrderID":0,"DeliveryDate":None}
    except TypeError:
        return {"OrderID":0,"DeliveryDate":None}

df["cleaned_orders"]=df.apply(cleaned,axis=1)
print(df)

   OrderID  DeliveryDate
0    101.0    2023-01-10
1      NaN  Invalid Date
   OrderID  DeliveryDate                         cleaned_orders
0    101.0    2023-01-10  {'OrderID': 101, 'DeliveryDate': NaT}
1      NaN  Invalid Date    {'OrderID': 0, 'DeliveryDate': NaT}


4️⃣ Read Multiple CSV Files and Log Missing Files

files = ["jan_sales.csv", "feb_sales.csv", "mar_sales.csv"]

In [17]:
files = ["jan_sales.csv", "feb_sales.csv", "mar_sales.csv"]

import pandas as pd

def loader(filepath):
    try:
        df=pd.read_csv(filepath)
        return df
        
    except Exception as e:
        print(e)

file_loading=[loader(file)for file in files]
print(file_loading)

[Errno 2] No such file or directory: 'jan_sales.csv'
[Errno 2] No such file or directory: 'mar_sales.csv'
[None,       Product  Sales  Quantity
0      Laptop  50000        50
1      Mobile  30000       100
2      Tablet  20000        70
3  Headphones  15000       200
4  Smartwatch  10000        90, None]


5️⃣ Process Transactions, Convert Amounts to Float

transactions = [{"ID": "TXN1001", "Amount": "5000"}, {"ID": "TXN1002", "Amount": "Invalid"}, {"ID": None, "Amount": 3000}]

In [33]:
transactions = [{"ID": "TXN1001", "Amount": "5000"}, {"ID": "TXN1002", "Amount": "Invalid"}, {"ID": None, "Amount": 3000}]

import pandas as pd
df=pd.DataFrame(transactions)
print(df)

def cleaned(value):
    try:
        id=str(value["ID"]) if value["ID"] else "Unknown"
        amount=int(value["Amount"]) if not pd.isna(value["Amount"]) else 0
        return {"ID":id,"Amount":amount}
    except ValueError:
        return {"ID":id,"Amount":0}
    except TypeError:
        return {"ID":id,"Amount":0} 

df["cleaned_txn"]=df.apply(cleaned,axis=1)
print(df)

        ID   Amount
0  TXN1001     5000
1  TXN1002  Invalid
2     None     3000
        ID   Amount                        cleaned_txn
0  TXN1001     5000  {'ID': 'TXN1001', 'Amount': 5000}
1  TXN1002  Invalid     {'ID': 'TXN1002', 'Amount': 0}
2     None     3000  {'ID': 'Unknown', 'Amount': 3000}


6️⃣ Convert Bill Amounts to Integers

data = [{"Customer": "Rahul", "Bill Amount": "1200"}, {"Customer": "Aisha", "Bill Amount": "Five Hundred"}, {"Customer": "Karan", "Bill Amount": "2000"}]

In [34]:
data = [{"Customer": "Rahul", "Bill Amount": "1200"}, {"Customer": "Aisha", "Bill Amount": "Five Hundred"}, {"Customer": "Karan", "Bill Amount": "2000"}]

import pandas as pd

df=pd.DataFrame(data)
print(df)

def cleaned(value):
    try:
        customer=str(value["Customer"]) if value["Customer"] else "Unknown"
        amount=int(value["Bill Amount"]) if not pd.isna(value["Bill Amount"]) else 0
        return {"Customer":customer,"Bill Amount":amount}
    except ValueError:
        return {"Customer":customer,"Bill Amount":0}
    except TypeError:
        return {"Customer":customer,"Bill Amount":0}

df["cleaned_data"]=df.apply(cleaned,axis=1)
print(df)

  Customer   Bill Amount
0    Rahul          1200
1    Aisha  Five Hundred
2    Karan          2000
  Customer   Bill Amount                                cleaned_data
0    Rahul          1200  {'Customer': 'Rahul', 'Bill Amount': 1200}
1    Aisha  Five Hundred     {'Customer': 'Aisha', 'Bill Amount': 0}
2    Karan          2000  {'Customer': 'Karan', 'Bill Amount': 2000}


7️⃣ Handle Missing Warehouse File Gracefully

file_path = "warehouse_data.csv"

In [20]:
import pandas as pd

filepath="warehouse_data.csv"

try:
    df=pd.read_csv(filepath)
    print(df)
except Exception as e:
    print(e)


[Errno 2] No such file or directory: 'warehouse_data.csv'


8️⃣ Replace Missing Ages with Average Age in DataFrame

data = {"Patient": ["Rahul", "Amit", "Anita"], "Age": [30, None, 40]}

In [38]:
data = {"Patient": ["Rahul", "Amit", "Anita"], "Age": [30, None, 40]}

import pandas as pd

df=pd.DataFrame(data)
print(df)

cleaned=[]

for value in df["Age"]:

    try:
        cleaned.append(int(value))
        
    except Exception as e:
        
        cleaned.append(0)

df["updated age"]=cleaned
print(df)


  Patient   Age
0   Rahul  30.0
1    Amit   NaN
2   Anita  40.0
  Patient   Age  updated age
0   Rahul  30.0           30
1    Amit   NaN            0
2   Anita  40.0           40


9️⃣ Read File and Convert Each Line to Integer, Logging Errors

file_content = ["100", "200", "Invalid", "400"]


In [28]:
file_content = ["100", "200", "Invalid", "400"]

import logging

logging.basicConfig(filename="error_log.txt", level=logging.ERROR)

def cleaned(value):
    try:
        return int(value)
    except Exception as e:
        logging.error({e})
        return 0

list=[cleaned(value) for value in file_content]
print(list)

[100, 200, 0, 400]


🔟 Validate Pincode Entries in Addresses

addresses = [{"City": "Mumbai", "Pincode": 400001}, {"City": "Delhi", "Pincode": "One One Zero Zero One"}]

In [46]:
import pandas

addresses = [{"City": "Mumbai", "Pincode": 400001}, {"City": "Delhi", "Pincode": "One One Zero Zero One"}]

for value in addresses:

    try:
        value["City"]=str(value["City"])
        value["Pincode"]=int(value["Pincode"])
    except (ValueError,TypeError):
        value["Pincode"]=0

print(addresses)

[{'City': 'Mumbai', 'Pincode': 400001}, {'City': 'Delhi', 'Pincode': 0}]
