In [None]:
# 🛠️ Cleaning Data of Wrong Format in Pandas

# Sometimes a column contains values in the wrong format,  
# like dates as strings, numbers as text, or corrupted values.

# Pandas allows you to convert columns to the correct format using functions like `pd.to_datetime()` or `.astype()`.

In [1]:
import pandas as pd

data = {
    "Date": ["2024-01-01", "2024-02-15", "not_a_date", "2024/03/10"],
    "Price": ["100", "200", "invalid", "300"]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Date,Price
0,2024-01-01,100
1,2024-02-15,200
2,not_a_date,invalid
3,2024/03/10,300


In [2]:
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df

Unnamed: 0,Date,Price
0,2024-01-01,100
1,2024-02-15,200
2,NaT,invalid
3,NaT,300


In [None]:
## 📅 Convert Strings to Date Format

# Use `pd.to_datetime()` to convert strings to actual datetime objects.
# Invalid values will be converted to `NaT`.

In [None]:
df = df.dropna()
df

## 🧹 Remove Invalid (NaN) Values After Conversion

# Now that we know which rows are invalid, we can remove them.

Unnamed: 0,Date,Price
0,2024-01-01,100
1,2024-02-15,200


In [None]:
# ✅ Use `pd.to_datetime()` and `pd.to_numeric()` to fix wrong formats  
# ✅ Use `errors='coerce'` to handle bad values safely  
# ✅ Drop or fill `NaN` after conversion  

