# Demo
Fixing errors in numeric data using pandas

In [1]:
import pandas as pd

In [2]:
# Create a sample Dataframe with various numeric columns needing cleanup
data = {
    'Price': ['$100.00', '$200.50', '$300.75', '$400.00'], 
    'Revenue': ['1,000', '2,500', '3,750', '5,000'], 
    'Quantity': ['10', '15', '20', 'twenty-five'], # twenty-five is incorrectly type as string
}

In [3]:
df = pd.DataFrame(data)

In [5]:
# Visualy example the first few rows of the imported data
df.head()

Unnamed: 0,Price,Revenue,Quantity
0,$100.00,1000,10
1,$200.50,2500,15
2,$300.75,3750,20
3,$400.00,5000,twenty-five


In [6]:
# Examin data type of each column
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Price     4 non-null      object
 1   Revenue   4 non-null      object
 2   Quantity  4 non-null      object
dtypes: object(3)
memory usage: 224.0+ bytes


In [8]:
# Display original dataframe
print("Original Dataframe:")
df

Original Dataframe:


Unnamed: 0,Price,Revenue,Quantity
0,$100.00,1000,10
1,$200.50,2500,15
2,$300.75,3750,20
3,$400.00,5000,twenty-five


In [11]:
df['Price'] = df['Price'].replace({'\$': ''}, regex=True).astype(float)

In [13]:
df['Revenue'] = df['Revenue'].replace({',': ''}, regex=True).astype(float)

In [14]:
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

In [15]:
# Display cleaned dataframe
print("Cleaned Dataframe:")
df

Cleaned Dataframe:


Unnamed: 0,Price,Revenue,Quantity
0,100.0,1000.0,10.0
1,200.5,2500.0,15.0
2,300.75,3750.0,20.0
3,400.0,5000.0,


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Price     4 non-null      float64
 1   Revenue   4 non-null      float64
 2   Quantity  3 non-null      float64
dtypes: float64(3)
memory usage: 224.0 bytes
