Run the following code and use the `head()` function to view the results of the first 100 rows.

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

cars = pd.read_csv('cars.csv')
# Convert 'registration_month_year' to string format for manipulation
cars['registration_month_year'] = cars['registration_month_year'].astype(str)

# Identify erroneous rows with only year
mask_year_only = cars['registration_month_year'].str.match(r'^\d{4}$', na=False)

# Generate a random month (as string) for these rows
random_months = pd.Series(np.random.choice(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], size=mask_year_only.sum()))

# Append the random month and a dash to the last two digits of the year for erroneous rows
cars.loc[mask_year_only, 'registration_month_year'] = random_months + '-' + cars.loc[mask_year_only, 'registration_month_year'].str[-2:]

# Identify erroneous rows with 'Third Party insurance'
mask_insurance = (cars['registration_month_year'] == 'Third Party insurance')

# For these rows, make 'registration_month_year' equal to the last two digits of 'manufacturing_year' with a random month added
random_months_insurance = pd.Series(np.random.choice(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], size=mask_insurance.sum()))
cars.loc[mask_insurance, 'registration_month_year'] = random_months_insurance + '-' + cars.loc[mask_insurance, 'manufacturing_year'].astype(str).str[-2:]

# Convert 'registration_month_year' back to datetime format
cars['registration_month_year'] = pd.to_datetime(cars['registration_month_year'], format='%b-%y')

# Define today's date
today = pd.to_datetime('2024-03-01')

# Calculate the age in years and months
cars['age'] = (today.year - cars['registration_month_year'].dt.year) * 12 + today.month - cars['registration_month_year'].dt.month

print(cars.head(100))

    Unnamed: 0                                           car_name  \
0            0                    2017 Mercedes-Benz S-Class S400   
1            1  2020 Nissan Magnite Turbo CVT XV Premium Opt BSVI   
2            2                       2018 BMW X1 sDrive 20d xLine   
3            3                           2019 Kia Seltos GTX Plus   
4            4                    2019 Skoda Superb LK 1.8 TSI AT   
..         ...                                                ...   
95          95                        2018 Maruti Wagon R CNG LXI   
96          96                        2009 Maruti Swift Dzire VDi   
97          97                            2016 Hyundai i10 Sportz   
98          98                              2018 Maruti Swift VXI   
99          99                            2018 Mahindra XUV500 W5   

   registration_month_year         insurance_type fuel_type  seats  \
0               2017-07-01          Comprehensive    Petrol      5   
1               2021-01-01     