In [10]:
import pandas as pd

# Create a sample DataFrame
data = {
    'Age': ['25', '30', '35', '40', '45'],
    'Income': ['$1000.5', '$1500.0', '$2000.75', '?', '$3000.25'],
    'Signup_Date': ['2023-01-15', '2023-02-20', '2023-03-25', '2023-04-30', '2023-05-05'],
    'Color': ['Red', 'Blue', 'Green', 'Red', 'Blue'],
    'Score': [85, 90, 78, 92, 88]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Age,Income,Signup_Date,Color,Score
0,25,$1000.5,2023-01-15,Red,85
1,30,$1500.0,2023-02-20,Blue,90
2,35,$2000.75,2023-03-25,Green,78
3,40,?,2023-04-30,Red,92
4,45,$3000.25,2023-05-05,Blue,88


In [11]:
df.dtypes

Age            object
Income         object
Signup_Date    object
Color          object
Score           int64
dtype: object

In [12]:
df['Age'] = pd.to_numeric(df['Age'], errors = 'coerce')

In [13]:
df.dtypes

Age             int64
Income         object
Signup_Date    object
Color          object
Score           int64
dtype: object

In [14]:
df['Income'] = df['Income'].str.replace('$','')

In [15]:
df.Income

0     1000.5
1     1500.0
2    2000.75
3          ?
4    3000.25
Name: Income, dtype: object

In [18]:
df['Income'] = pd.to_numeric(df['Income'], errors = 'coerce')

In [19]:
df.dtypes

Age              int64
Income         float64
Signup_Date     object
Color           object
Score            int64
dtype: object

In [20]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score
0,25,1000.5,2023-01-15,Red,85
1,30,1500.0,2023-02-20,Blue,90
2,35,2000.75,2023-03-25,Green,78
3,40,,2023-04-30,Red,92
4,45,3000.25,2023-05-05,Blue,88


In [21]:
df['Score'] = df['Score'].astype('float')

In [22]:
df.dtypes

Age              int64
Income         float64
Signup_Date     object
Color           object
Score          float64
dtype: object

In [23]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score
0,25,1000.5,2023-01-15,Red,85.0
1,30,1500.0,2023-02-20,Blue,90.0
2,35,2000.75,2023-03-25,Green,78.0
3,40,,2023-04-30,Red,92.0
4,45,3000.25,2023-05-05,Blue,88.0


In [25]:
df['Signup_Date'] = pd.to_datetime(df['Signup_Date'])

In [26]:
df.dtypes

Age                     int64
Income                float64
Signup_Date    datetime64[ns]
Color                  object
Score                 float64
dtype: object

In [28]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score
0,25,1000.5,2023-01-15,Red,85.0
1,30,1500.0,2023-02-20,Blue,90.0
2,35,2000.75,2023-03-25,Green,78.0
3,40,,2023-04-30,Red,92.0
4,45,3000.25,2023-05-05,Blue,88.0


In [32]:
df['Year'] = df['Signup_Date'].dt.year

In [33]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score,Year
0,25,1000.5,2023-01-15,Red,85.0,2023
1,30,1500.0,2023-02-20,Blue,90.0,2023
2,35,2000.75,2023-03-25,Green,78.0,2023
3,40,,2023-04-30,Red,92.0,2023
4,45,3000.25,2023-05-05,Blue,88.0,2023


In [35]:
df['Color'] = df['Color'].astype('category').cat.codes

In [36]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score,Year
0,25,1000.5,2023-01-15,2,85.0,2023
1,30,1500.0,2023-02-20,0,90.0,2023
2,35,2000.75,2023-03-25,1,78.0,2023
3,40,,2023-04-30,2,92.0,2023
4,45,3000.25,2023-05-05,0,88.0,2023


In [39]:
# Define bins and labels
bins = [0, 80, 90, 100]
labels = ['Low', 'Medium', 'High']

# Convert 'Score' to categorical
df['Score_Category'] = pd.cut(df['Score'], labels = labels, bins = bins)


In [40]:
df

Unnamed: 0,Age,Income,Signup_Date,Color,Score,Year,Score_Category
0,25,1000.5,2023-01-15,2,85.0,2023,Medium
1,30,1500.0,2023-02-20,0,90.0,2023,Medium
2,35,2000.75,2023-03-25,1,78.0,2023,Low
3,40,,2023-04-30,2,92.0,2023,High
4,45,3000.25,2023-05-05,0,88.0,2023,Medium
