In [1]:
import pandas as pd
import numpy as np

import logging

import plotly.express as px


In [2]:
logging.basicConfig(
    level=logging.INFO,  # Set the minimum log level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Log message format
    # filename='app.log'  # Log to a file (optional)
)

In [3]:
class DataLoader:
    def __init__(self, raw_dataset):
        self.raw_dataset = raw_dataset


    def read_data(self):
        """
        handling file existence, empty dataset, and parsing errors
        """
        try:
            return pd.read_csv(self.raw_dataset, encoding="ISO-8859-1", engine='python')
            logging.info("read the dataset successfully")
        except FileNotFoundError:
            logging.error("Error: The file was not found.")
        except pd.errors.EmptyDataError:
            logging.error("Error: The file is empty.")
        except pd.errors.ParserError:
            logging.error("Error: The file could not be parsed.")

In [4]:
class DataCleaner:
    def __init__(self, raw_dataset):
        self.raw_dataset = raw_dataset

    def strip_spaces(self):
        try:
            self.raw_dataset['Location'] = self.raw_dataset['Location'].str.strip()
            self.raw_dataset['Description'] = self.raw_dataset['Description'].str.strip()
            logging.info("Successfully stripped spaces")
        except Exception as e:
            logging.error(f"Failed to strip some white spaces: {e}")
        return self.raw_dataset

    def titlise_columns(self):
        try:
            self.raw_dataset['Make'] = self.raw_dataset['Make'].str.title()
            self.raw_dataset['Model'] = self.raw_dataset['Model'].str.title()
            self.raw_dataset['Location'] = self.raw_dataset['Location'].str.title()
            logging.info("Successfully titlised columns")
        except Exception as e:
            logging.error(f"Failed to titlise some columns: {e}")
        return self.raw_dataset

    def convert_to_string(self):
        try:
            cols_to_convert = ['Body Type', 'Fuel Type', 'Color', 'Description']
            self.raw_dataset[cols_to_convert] = self.raw_dataset[cols_to_convert].astype(str)
            logging.info("Successfully converted specified columns to string type")
        except Exception as e:
            logging.error(f"Failed to convert some columns into strings: {e}")
        return self.raw_dataset

    def convert_to_integer(self):
        try:
            numeric_cols = ['Year', 'Price', 'Mileage', 'Cylinders']
            self.raw_dataset[numeric_cols] = self.raw_dataset[numeric_cols].apply(pd.to_numeric, errors='coerce').astype('Int64')
            logging.info("Successfully converted specified columns to integer type")
        except Exception as e:
            logging.error(f"Failed to convert some columns into integers: {e}")
        return self.raw_dataset

    def replace_columns(self):
        try:
            self.raw_dataset['Transmission'] = self.raw_dataset['Transmission'].replace({
                'Automatic Transmission': 'Automatic', 
                'Manual Transmission': 'Manual'
            }, inplace=False)
            self.raw_dataset['Cylinders'] = self.raw_dataset['Cylinders'].replace('Unknown', np.nan)
            logging.info("Successfully replaced column values")
        except Exception as e:
            logging.error(f"Failed to replace some column values: {e}")
        return self.raw_dataset

    def add_underscore_to_columns(self):
        try:
            self.raw_dataset.columns = ['_'.join(column.split()) for column in self.raw_dataset.columns]
            logging.info("Successfully added underscores to column titles")
        except Exception as e:
            logging.error(f"Failed to add underscores to column titles: {e}")
        return self.raw_dataset

    def imputate_null_values(self):
        try:
            for column in self.raw_dataset.select_dtypes(include=['float64', 'int64']).columns:
                self.raw_dataset[column] = self.raw_dataset[column].fillna(self.raw_dataset[column].mean())
                logging.info(f"Successfully imputated numerical values for {column}")
        except Exception as e:
            logging.error(f"Failed to imputate some numeric values: {e}")

        try:
            for column in self.raw_dataset.select_dtypes(include=['object']).columns:
                self.raw_dataset[column] = self.raw_dataset[column].fillna(self.raw_dataset[column].mode()[0])
                logging.info(f"Successfully imputated alphabetic values for {column}")
        except Exception as e:
            logging.error(f"Failed to imputate some alphabetic values: {e}")

        return self.raw_dataset

    def remove_duplicates(self):
        try:
            logging.info(f"Number of rows before removing duplicates: {len(self.raw_dataset)}")
            # Remove duplicate rows based on all columns
            self.raw_dataset = self.raw_dataset.drop_duplicates(keep='first')
            logging.info(f"Number of rows after removing duplicates: {len(self.raw_dataset)}")
        except Exception as e:
            logging.error(f"Failed to remove some duplicate values: {e}")

        return self.raw_dataset

    def save_changes(self, cleaned_data_path='../dataset/cleaned/cleaned_uae_cars.csv'):
        try:
            self.raw_dataset.to_csv(cleaned_data_path, index=False)
            logging.info("The cleaned dataset was successfully saved")
        except Exception as e:
            logging.error(f"Failed to save the cleaned data file: {e}")
        return self.raw_dataset


In [5]:
class DataExplorer:
    def __init__(self, cleaned_data):
        self.cleaned_data = cleaned_data

    def top_5_most_expensive_cars(self):
        try:
            # Use .loc to avoid SettingWithCopyWarning
            self.cleaned_data.loc[:, 'Car'] = self.cleaned_data.apply(
                lambda row: f"{row['Make']} {row['Model']} {row['Year']}", axis=1
            )
            
            # Sort and get the top 5 most expensive cars
            top_5 = (
                self.cleaned_data[['Car', 'Price']]
                .sort_values(by='Price', ascending=False)
                .head(5)
            )
            logging.info(f"Top 5 most expensive cars: \n{top_5}")
        except Exception as e:
            logging.error(f"Failed to get top 5 most expensive cars: {e}")
            top_5 = pd.DataFrame()

        return top_5


    def avg_price_of_make(self):
        try:
            avg_price_of_make = (
                self.cleaned_data.groupby('Make')['Price']
                .mean()
            )
            
            # Sort and get the top 5 most expensive cars
            avg_price_of_make = (
                self.cleaned_data[['Make', 'Price']]
                .sort_values(by='Price', ascending=False)
                .head(5)
            )
            logging.info(f"Top 5 most expensive cars: \n{avg_price_of_make}")
        except Exception as e:
            logging.error(f"Failed to get top 5 most expensive cars: {e}")

        return avg_price_of_make

In [6]:
if __name__ == "__main__":
    raw_dataset = '../dataset/raw/uae_used_cars.csv'
    cleaned_dataset = '../dataset/cleaned/cleaned_uae_cars.csv'

# Instantiate the Loader Class

In [8]:
    loader = DataLoader(raw_dataset)

# Read and Load the Dataset

In [10]:
    data_loader = loader.read_data()
    print(data_loader)

               Make                   Model  Year   Price  Mileage  \
0            toyota                   camry  2016   47819   156500   
1               kia                 sorento  2013   61250   169543   
2              mini                  cooper  2023   31861   221583   
3            nissan                  altima  2016  110322    69754   
4            toyota  land-cruiser-76-series  2020  139994    71399   
...             ...                     ...   ...     ...      ...   
9995          tesla                 model-3  2018  273413    76920   
9996           audi                      a3  2022   80053   258150   
9997         toyota                   prado  2014  183381    80525   
9998        peugeot                  expert  2016   40876   288305   
9999  mercedes-benz                 c-class  2009  150261   283648   

                 Body Type Cylinders            Transmission Fuel Type  Color  \
0                    Sedan         4  Automatic Transmission  Gasoline  Black 

In [11]:
    print(data_loader.describe())

               Year         Price        Mileage
count  10000.000000  1.000000e+04   10000.000000
mean    2014.472800  2.452345e+05  155161.871700
std        5.790839  4.709773e+05   83681.858983
min     2005.000000  7.183000e+03   10006.000000
25%     2009.000000  5.035250e+04   82904.000000
50%     2014.000000  1.027660e+05  154370.500000
75%     2019.000000  2.312480e+05  227551.250000
max     2024.000000  1.468698e+07  299996.000000


In [12]:
    print(data_loader.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Make          10000 non-null  object
 1   Model         10000 non-null  object
 2   Year          10000 non-null  int64 
 3   Price         10000 non-null  int64 
 4   Mileage       10000 non-null  int64 
 5   Body Type     10000 non-null  object
 6   Cylinders     9895 non-null   object
 7   Transmission  10000 non-null  object
 8   Fuel Type     10000 non-null  object
 9   Color         10000 non-null  object
 10  Location      10000 non-null  object
 11  Description   10000 non-null  object
dtypes: int64(3), object(9)
memory usage: 937.6+ KB
None


In [13]:
    print(data_loader.columns)

Index(['Make', 'Model', 'Year', 'Price', 'Mileage', 'Body Type', 'Cylinders',
       'Transmission', 'Fuel Type', 'Color', 'Location', 'Description'],
      dtype='object')


# Locations

- some cities contain white spaces which they should be stripped of
- there are duplicate location that must be dropped with duplicate rows

In [15]:
    print(data_loader['Location'].unique())

[' Dubai' ' Abu Dhabi' 'Abu Dhabi' ' Sharjah' 'Dubai' ' Ajman' 'Ajman'
 ' Al Ain' 'Al Ain' ' Fujeirah' 'Umm Al Qawain' ' Umm Al Qawain' 'Sharjah'
 'Ras Al Khaimah' ' Ras Al Khaimah' 'Fujeirah']


In [16]:
    print(type(data_loader))

<class 'pandas.core.frame.DataFrame'>


# Passing the loaded dataset into the Cleaner Class

In [18]:
    cleaner = DataCleaner(data_loader)

In [19]:
    cleaner.strip_spaces()

2025-03-14 01:47:28,998 - INFO - Successfully stripped spaces


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,toyota,camry,2016,47819,156500,Sedan,4,Automatic Transmission,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,kia,sorento,2013,61250,169543,SUV,4,Automatic Transmission,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,mini,cooper,2023,31861,221583,Soft Top Convertible,4,Automatic Transmission,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,nissan,altima,2016,110322,69754,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,toyota,land-cruiser-76-series,2020,139994,71399,Pick Up Truck,4,Manual Transmission,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,tesla,model-3,2018,273413,76920,Sedan,,Automatic Transmission,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,audi,a3,2022,80053,258150,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,toyota,prado,2014,183381,80525,SUV,6,Automatic Transmission,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,peugeot,expert,2016,40876,288305,Utility Truck,4,Automatic Transmission,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [20]:
    cleaner.titlise_columns()

2025-03-14 01:47:29,012 - INFO - Successfully titlised columns


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic Transmission,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic Transmission,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic Transmission,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual Transmission,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,,Automatic Transmission,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic Transmission,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic Transmission,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [21]:
    print(data_loader['Make'].unique())

['Toyota' 'Kia' 'Mini' 'Nissan' 'Chevrolet' 'Cadillac' 'Mercedes-Benz'
 'Infiniti' 'Mazda' 'Jeep' 'Ferrari' 'Bmw' 'Porsche' 'Bentley'
 'Land-Rover' 'Honda' 'Dodge' 'Rolls-Royce' 'Ford' 'Hyundai' 'Lamborghini'
 'Mitsubishi' 'Aston-Martin' 'Gmc' 'Renault' 'Volkswagen' 'Lexus' 'Suzuki'
 'Lincoln' 'Audi' 'Maybach' 'Peugeot' 'Jaguar' 'Citroen' 'Maserati'
 'Tesla' 'Volvo' 'Lotus' 'Mclaren' 'Alfa-Romeo' 'Fiat' 'Chrysler' 'Opel'
 'Mercedes-Maybach' 'Geely' 'Acura' 'Subaru' 'Genesis' 'Isuzu'
 'Westfield-Sportscars' 'Mg' 'Hummer' 'Skoda' 'Mercury' 'Rover' 'Changan'
 'Other-Make' 'Daihatsu' 'Jetour' 'Saab' 'Gac' 'Haval' 'Baic' 'Smart'
 'Morgan']


In [22]:
    print(data_loader['Transmission'].unique())

['Automatic Transmission' 'Manual Transmission']


In [23]:
    cleaner.convert_to_string()

2025-03-14 01:47:29,032 - INFO - Successfully converted specified columns to string type


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic Transmission,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic Transmission,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic Transmission,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual Transmission,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,,Automatic Transmission,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic Transmission,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic Transmission,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [24]:
    cleaner.imputate_null_values()

2025-03-14 01:47:29,042 - INFO - Successfully imputated numerical values for Year
2025-03-14 01:47:29,043 - INFO - Successfully imputated numerical values for Price
2025-03-14 01:47:29,043 - INFO - Successfully imputated numerical values for Mileage
2025-03-14 01:47:29,048 - INFO - Successfully imputated alphabetic values for Make
2025-03-14 01:47:29,050 - INFO - Successfully imputated alphabetic values for Model
2025-03-14 01:47:29,052 - INFO - Successfully imputated alphabetic values for Body Type
2025-03-14 01:47:29,053 - INFO - Successfully imputated alphabetic values for Cylinders
2025-03-14 01:47:29,054 - INFO - Successfully imputated alphabetic values for Transmission
2025-03-14 01:47:29,056 - INFO - Successfully imputated alphabetic values for Fuel Type
2025-03-14 01:47:29,057 - INFO - Successfully imputated alphabetic values for Color
2025-03-14 01:47:29,059 - INFO - Successfully imputated alphabetic values for Location
2025-03-14 01:47:29,062 - INFO - Successfully imputated a

Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic Transmission,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic Transmission,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic Transmission,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual Transmission,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic Transmission,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic Transmission,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic Transmission,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [25]:
    cleaner.convert_to_integer()

2025-03-14 01:47:29,079 - INFO - Successfully converted specified columns to integer type


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic Transmission,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic Transmission,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic Transmission,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual Transmission,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic Transmission,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic Transmission,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic Transmission,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic Transmission,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [26]:
    print(data_loader['Mileage'].unique())

<IntegerArray>
[156500, 169543, 221583,  69754,  71399, 233934, 147140,  51876,  32956,
  11954,
 ...
 248002, 240157, 199410, 122833, 226578,  76920, 258150,  80525, 288305,
 283648]
Length: 9834, dtype: Int64


In [27]:
    cleaner.replace_columns()

2025-03-14 01:47:29,094 - INFO - Successfully replaced column values


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [28]:
    print(data_loader['Body Type'].unique())

['Sedan' 'SUV' 'Soft Top Convertible' 'Pick Up Truck' 'Coupe' 'Crossover'
 'Hatchback' 'Hard Top Convertible' 'Other' 'Utility Truck' 'Sports Car'
 'Van' 'Wagon']


In [29]:
    cleaner.convert_to_string()

2025-03-14 01:47:29,110 - INFO - Successfully converted specified columns to string type


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [30]:
    print(data_loader['Cylinders'].unique())

<IntegerArray>
[4, 8, 6, 5, 12, <NA>, 3, 10]
Length: 8, dtype: Int64


In [31]:
    cleaner.replace_columns()

2025-03-14 01:47:29,125 - INFO - Successfully replaced column values


Unnamed: 0,Make,Model,Year,Price,Mileage,Body Type,Cylinders,Transmission,Fuel Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [32]:
    print(data_loader['Fuel Type'].unique())

['Gasoline' 'Diesel' 'Hybrid' 'Electric']


In [33]:
    print(data_loader['Color'].unique())

['Black' 'Grey' 'Red' 'White' 'Brown' 'Silver' 'Gold' 'Blue' 'Other Color'
 'Beige' 'Burgundy' 'Orange' 'Green' 'Purple' 'Yellow' 'Teal' 'Tan']


In [34]:
    cleaner.add_underscore_to_columns()

2025-03-14 01:47:29,142 - INFO - Successfully added underscores to column titles


Unnamed: 0,Make,Model,Year,Price,Mileage,Body_Type,Cylinders,Transmission,Fuel_Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [35]:
    cleaner.remove_duplicates()

2025-03-14 01:47:29,152 - INFO - Number of rows before removing duplicates: 10000
2025-03-14 01:47:29,165 - INFO - Number of rows after removing duplicates: 10000


Unnamed: 0,Make,Model,Year,Price,Mileage,Body_Type,Cylinders,Transmission,Fuel_Type,Color,Location,Description
0,Toyota,Camry,2016,47819,156500,Sedan,4,Automatic,Gasoline,Black,Dubai,"2016 toyota camry with Rear camera, Leather se..."
1,Kia,Sorento,2013,61250,169543,SUV,4,Automatic,Gasoline,Grey,Abu Dhabi,"2013 kia sorento with Sunroof, Adaptive cruise..."
2,Mini,Cooper,2023,31861,221583,Soft Top Convertible,4,Automatic,Gasoline,Grey,Dubai,"2023 mini cooper with Adaptive cruise control,..."
3,Nissan,Altima,2016,110322,69754,Sedan,4,Automatic,Gasoline,Red,Dubai,"2016 nissan altima with Rear camera, Adaptive ..."
4,Toyota,Land-Cruiser-76-Series,2020,139994,71399,Pick Up Truck,4,Manual,Gasoline,White,Dubai,2020 toyota land-cruiser-76-series with Adapti...
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Tesla,Model-3,2018,273413,76920,Sedan,6,Automatic,Electric,White,Dubai,"2018 tesla model-3 with Bluetooth, Sunroof, Le..."
9996,Audi,A3,2022,80053,258150,Sedan,4,Automatic,Gasoline,Red,Dubai,"2022 audi a3 with Sunroof, Bluetooth, Rear cam..."
9997,Toyota,Prado,2014,183381,80525,SUV,6,Automatic,Gasoline,White,Dubai,"2014 toyota prado with Rear camera, Adaptive c..."
9998,Peugeot,Expert,2016,40876,288305,Utility Truck,4,Automatic,Diesel,White,Dubai,"2016 peugeot expert with Navigation system, Re..."


In [36]:
    saved_data = cleaner.save_changes()

2025-03-14 01:47:29,218 - INFO - The cleaned dataset was successfully saved


In [37]:
    explorer = DataExplorer(saved_data)

In [38]:
    explorer.top_5_most_expensive_cars()

2025-03-14 01:47:29,289 - INFO - Top 5 most expensive cars: 
                     Car     Price
7966     Mclaren P1 2020  14686975
3928     Mclaren P1 2005  12909943
3728   Mclaren Elva 2013  10308926
9980   Mclaren Elva 2016   9862754
3102  Mclaren Senna 2013   5314704


Unnamed: 0,Car,Price
7966,Mclaren P1 2020,14686975
3928,Mclaren P1 2005,12909943
3728,Mclaren Elva 2013,10308926
9980,Mclaren Elva 2016,9862754
3102,Mclaren Senna 2013,5314704


In [39]:
    explorer.avg_price_of_make()

2025-03-14 01:47:29,305 - INFO - Top 5 most expensive cars: 
         Make     Price
7966  Mclaren  14686975
3928  Mclaren  12909943
3728  Mclaren  10308926
9980  Mclaren   9862754
3102  Mclaren   5314704


Unnamed: 0,Make,Price
7966,Mclaren,14686975
3928,Mclaren,12909943
3728,Mclaren,10308926
9980,Mclaren,9862754
3102,Mclaren,5314704
