# Data Cleaning/Prep Script

## Section 0: Initial Setup

### Section 0.1: Import required libraries

In [1]:
import pandas as pd
import datetime
import numpy as np

### Section 0.2: Read in the data

In [2]:
df = pd.read_csv('22032022-PPR-Price-recent.csv', keep_default_na=True, sep=',', skipinitialspace=True, engine="python")

In [3]:
df.head()

Unnamed: 0,Date of Sale (dd/mm/yyyy),Address,Postal Code,County,Price (€),Not Full Market Price,VAT Exclusive,Description of Property,Property Size Description
0,2022-01-15,"24 FOREST WALK, SWORDS, DUBLIN",,Dublin,154000.0,No,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 CRAGAUN, FATHER RUSSELL ROAD, DOORADOYLE",,Limerick,370000.0,No,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 The Lawn, Mullen Park, Maynooth",,Kildare,409691.63,No,Yes,New Dwelling house /Apartment,
3,2022-01-16,"MONTEVIDEO, HOSPITAL HILL, BUNCLODY",,Wexford,100000.0,No,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 CILL BEG MANOR, STRADBALLY, LAOIS",,Laois,225000.0,No,No,Second-Hand Dwelling house /Apartment,


### Section 0.3: Change the column names

In [4]:
df.columns = df.columns.str.replace(' ', '_')
df.columns

Index(['Date_of_Sale_(dd/mm/yyyy)', 'Address', 'Postal_Code', 'County',
       'Price_(€)', 'Not_Full_Market_Price', 'VAT_Exclusive',
       'Description_of_Property', 'Property_Size_Description'],
      dtype='object')

### Section 0.4: Change the data types

In [5]:
# Remove € and , characters and convert to float
df["Price_(€)"] = df["Price_(€)"].replace("[\€,]", "", regex=True).astype(float)

In [6]:
df["Date_of_Sale_(dd/mm/yyyy)"] = pd.to_datetime(df["Date_of_Sale_(dd/mm/yyyy)"], format="%Y-%m-%d")
df.rename(columns={"Date_of_Sale_(dd/mm/yyyy)": "Date_of_Sale"}, inplace=True)

In [7]:
# Select all columns of type 'object'
object_columns = df.select_dtypes(['object']).columns

# Convert selected columns to type 'category'
for column in object_columns:
    df[column] = df[column].astype('category')
df.dtypes

Date_of_Sale                 datetime64[ns]
Address                            category
Postal_Code                        category
County                             category
Price_(€)                           float64
Not_Full_Market_Price              category
VAT_Exclusive                      category
Description_of_Property            category
Property_Size_Description           float64
dtype: object

In [8]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),Not_Full_Market_Price,VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 FOREST WALK, SWORDS, DUBLIN",,Dublin,154000.0,No,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 CRAGAUN, FATHER RUSSELL ROAD, DOORADOYLE",,Limerick,370000.0,No,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 The Lawn, Mullen Park, Maynooth",,Kildare,409691.63,No,Yes,New Dwelling house /Apartment,
3,2022-01-16,"MONTEVIDEO, HOSPITAL HILL, BUNCLODY",,Wexford,100000.0,No,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 CILL BEG MANOR, STRADBALLY, LAOIS",,Laois,225000.0,No,No,Second-Hand Dwelling house /Apartment,


## Section 1: Address

### Section 1.1: Convert all addresses to small caps

In [9]:
def convert_to_small_caps(address):
    """Function for converting an address to lower case"""
    return address.lower()

In [10]:
df["Address"] = df["Address"].apply(convert_to_small_caps)

In [11]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),Not_Full_Market_Price,VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",,Dublin,154000.0,No,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",,Limerick,370000.0,No,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",,Kildare,409691.63,No,Yes,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",,Wexford,100000.0,No,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",,Laois,225000.0,No,No,Second-Hand Dwelling house /Apartment,


### Section 1.2: Convert Postal Code values to lowercase (for comparability)

In [12]:
df["Postal_Code"] = df["Postal_Code"].apply(convert_to_small_caps)

In [13]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),Not_Full_Market_Price,VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",,Dublin,154000.0,No,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",,Limerick,370000.0,No,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",,Kildare,409691.63,No,Yes,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",,Wexford,100000.0,No,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",,Laois,225000.0,No,No,Second-Hand Dwelling house /Apartment,


### Section 1.3: Create a list of valid postal codes

In [14]:
# Create a list for storing postal codes and initialise this list with the county Dublin 6w
postal_codes = ["dublin 6w"]

# Loop through all valid postal code numbers to create a list of valid postal codes
for i in range(1, 25):
    if i in [19, 21, 23]:
        continue
    postal_codes.append("dublin " + str(i))

postal_codes

['dublin 6w',
 'dublin 1',
 'dublin 2',
 'dublin 3',
 'dublin 4',
 'dublin 5',
 'dublin 6',
 'dublin 7',
 'dublin 8',
 'dublin 9',
 'dublin 10',
 'dublin 11',
 'dublin 12',
 'dublin 13',
 'dublin 14',
 'dublin 15',
 'dublin 16',
 'dublin 17',
 'dublin 18',
 'dublin 20',
 'dublin 22',
 'dublin 24']

### Section 1.4: Add all postal codes from address to postal code when postal code is NAN

In [15]:
# Count of NAN values before using address postal codes
df["Postal_Code"].isna().sum()

5675

In [16]:
def address_postal_adder(address, postal_code):
    """Function that adds the address postal code to the postal code column when it is a valid postal code and the postal code value is missing"""
    new_postal_code = ""

    for valid_postal_code in postal_codes:
        if (valid_postal_code in address) and (pd.isnull(postal_code)) and (len(valid_postal_code) > len(new_postal_code)):
            print(valid_postal_code, address, postal_code)
            new_postal_code = valid_postal_code
    
    if len(new_postal_code) > 0:
        return new_postal_code

    return postal_code


In [17]:
df["Postal_Code"] = df.apply(lambda x: address_postal_adder(x["Address"], x["Postal_Code"]), axis=1)

dublin 6w 11 rossmore close, templeogue, dublin 6w nan
dublin 4 13 bayview, irish town, dublin 4 nan
dublin 6w 24 terenure rd west, terenure, dublin 6w nan
dublin 6w 24 terenure rd west, terenure, dublin 6w nan
dublin 6w 32 willington green, templeogue, dublin 6w nan
dublin 5 53 thornville avenue, dublin 5, dublin nan
dublin 7 11 dalymount, phibsboro, dublin 7 nan
dublin 2 11 the way, citywest village, dublin 24 nan
dublin 24 11 the way, citywest village, dublin 24 nan
dublin 2 80 merrion road, dublin 2 nan
dublin 1 151 howth road, sutton, dublin 13 nan
dublin 13 151 howth road, sutton, dublin 13 nan
dublin 1 17 cherry court, loughlinstown, dublin 18 nan
dublin 18 17 cherry court, loughlinstown, dublin 18 nan
dublin 1 171 castlemoyne, balgriffin, dublin 13 nan
dublin 13 171 castlemoyne, balgriffin, dublin 13 nan
dublin 2 80 thornfield square, clondalkin, dublin 22 nan
dublin 22 80 thornfield square, clondalkin, dublin 22 nan
dublin 6 88 cypress grove rd, templeogue, dublin 6 nan
dublin

In [18]:
# Count of NAN values after using address postal codes
df["Postal_Code"].isna().sum()

5547

## Section 2: Not full market price

### Section 2.1: Invert not full market price

In [19]:
def invert_yes_no(not_full_market_price):
    """Function that inverts the values in not full market price to solve the double negative issue"""
    if not_full_market_price == "no":
        return "yes"

    else:
        return "no"

In [20]:
df["Not_Full_Market_Price"] = df["Not_Full_Market_Price"].apply(convert_to_small_caps)
df["Not_Full_Market_Price"] = df["Not_Full_Market_Price"].apply(invert_yes_no)
df.rename(columns={"Not_Full_Market_Price": "Full_Market_Price"}, inplace=True)

In [21]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),Full_Market_Price,VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",,Dublin,154000.0,yes,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",,Limerick,370000.0,yes,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",,Kildare,409691.63,yes,Yes,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",,Wexford,100000.0,yes,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",,Laois,225000.0,yes,No,Second-Hand Dwelling house /Apartment,


In [22]:
df["Full_Market_Price"].value_counts()

yes    6953
no      229
Name: Full_Market_Price, dtype: int64

### Section 2.2: Drop rows where full market price = No

In [23]:
# Drop all rows with no for full market price, because it gives an inaccurate representation of price
df.drop(df[df["Full_Market_Price"] == "no"].index, inplace=True)
df.reset_index(drop=True, inplace=True)

In [24]:
df.shape

(6953, 9)

In [25]:
# Drop the full market price feature, because now it's all constant values
df.drop("Full_Market_Price", axis=1, inplace=True)

In [26]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",,Dublin,154000.0,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",,Limerick,370000.0,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",,Kildare,409691.63,Yes,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",,Wexford,100000.0,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",,Laois,225000.0,No,Second-Hand Dwelling house /Apartment,


## Section 3: Postal Code

### Section 3.1: Set the postal code to NAN when the county is outside Dublin

In [27]:
def postal_to_nan(postal_code, county):
    """Function that sets the postal code value to NAN whenever the property is outside of Dublin"""
    if county != "Dublin":
        return np.nan

    else:
        return postal_code

In [28]:
# Alter the postal code feature so that it no longer contains postal codes when the property is outside of Dublin
df["Postal_Code"] = df.apply(lambda x: postal_to_nan(x["Postal_Code"], x["County"]), axis=1)

# Use our logical integrity check from section 1 to identify Dublin postal codes in non-Dublin properties
non_dublin_postal_codes = df[(df["County"] != "Dublin")]
non_dublin_postal_codes = non_dublin_postal_codes[~non_dublin_postal_codes["Postal_Code"].isna()]
print(f"Number of properties outside of Dublin with Dublin postal codes: {len(non_dublin_postal_codes)}")

Number of properties outside of Dublin with Dublin postal codes: 0


### Section 3.2: Fill missing values in postal code column

In [29]:
df["Postal_Code"].fillna("unknown", inplace=True)

## Section 4: Price

### Section 4.1: Add VAT to VAT exclusive rows

In [30]:
def add_vat(vat_exclusive, price):
    """Adds VAT to VAT exclusive rows"""
    if vat_exclusive == "Yes":
        return round(price * 1.135)

    return round(price)

In [31]:
df[df["VAT_Exclusive"] == "Yes"].head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),VAT_Exclusive,Description_of_Property,Property_Size_Description
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,409691.63,Yes,New Dwelling house /Apartment,
8,2022-01-17,"10 effernock grove, trim",unknown,Meath,325992.0,Yes,New Dwelling house /Apartment,
33,2022-01-17,"162 drakes point, crosshaven",unknown,Cork,226872.0,Yes,New Dwelling house /Apartment,
39,2022-01-17,"18 butler town, ballinglanna, glanmire",unknown,Cork,265000.0,Yes,New Dwelling house /Apartment,
40,2022-01-17,"181 dundoogan, haynestown, dundalk",unknown,Louth,246652.0,Yes,New Dwelling house /Apartment,


In [32]:
df["Price_(€)"] = df.apply(lambda x: add_vat(x["VAT_Exclusive"], x["Price_(€)"]), axis=1)

In [33]:
df[df["VAT_Exclusive"] == "Yes"].head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_(€),VAT_Exclusive,Description_of_Property,Property_Size_Description
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000,Yes,New Dwelling house /Apartment,
8,2022-01-17,"10 effernock grove, trim",unknown,Meath,370001,Yes,New Dwelling house /Apartment,
33,2022-01-17,"162 drakes point, crosshaven",unknown,Cork,257500,Yes,New Dwelling house /Apartment,
39,2022-01-17,"18 butler town, ballinglanna, glanmire",unknown,Cork,300775,Yes,New Dwelling house /Apartment,
40,2022-01-17,"181 dundoogan, haynestown, dundalk",unknown,Louth,279950,Yes,New Dwelling house /Apartment,


### Section 4.2: Clamp outliers

In [34]:
print(np.min(df["Price_(€)"]))
print(np.max(df["Price_(€)"]))

9333
30566193


In [35]:
# Code reference: lab 4
a = df["Price_(€)"]

upper = np.percentile(a, 99)

lower = np.percentile(a, 1)

In [36]:
#Clamp the values in a to lower and upper bounds
a_clampped = np.zeros(a.size)

for i in range(a.size):
    if a[i] < lower:
        a_clampped[i] = round(lower)
    elif a[i] > upper:
            a_clampped[i] = round(upper)
    else: 
        a_clampped[i] = round(a[i])    

df["Price_(€)"] = a_clampped

In [37]:
print(np.min(df["Price_(€)"]))
print(np.max(df["Price_(€)"]))

35000.0
1409000.0


In [38]:
df.shape

(6953, 8)

### Section 4.3: Rename Price to be reflective of VAT

In [39]:
df.rename(columns={"Price_(€)": "Price_€_VAT_incl"}, inplace=True)
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_€_VAT_incl,VAT_Exclusive,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",unknown,Dublin,154000.0,No,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",unknown,Limerick,370000.0,No,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000.0,Yes,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",unknown,Wexford,100000.0,No,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",unknown,Laois,225000.0,No,Second-Hand Dwelling house /Apartment,


In [40]:
df.drop("VAT_Exclusive", axis=1, inplace=True)

In [41]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000.0,New Dwelling house /Apartment,
3,2022-01-16,"montevideo, hospital hill, bunclody",unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,
4,2022-01-17,"1 cill beg manor, stradbally, laois",unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,


## Section 5: Property Size Description

### Section 5.1: Replace NAN values with unknown

In [42]:
df["Property_Size_Description"] = df["Property_Size_Description"].astype("object")
df["Property_Size_Description"].fillna("unknown", inplace=True)
df.dtypes

Date_of_Sale                 datetime64[ns]
Address                              object
Postal_Code                          object
County                             category
Price_€_VAT_incl                    float64
Description_of_Property            category
Property_Size_Description            object
dtype: object

In [43]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,unknown
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,unknown
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000.0,New Dwelling house /Apartment,unknown
3,2022-01-16,"montevideo, hospital hill, bunclody",unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,unknown
4,2022-01-17,"1 cill beg manor, stradbally, laois",unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,unknown


### Section 5.2: Merge redundant categories

In [44]:
df["Postal_Code"] = df["Postal_Code"].astype("category")
df["Property_Size_Description"] = df["Property_Size_Description"].astype("category")

In [45]:
df["Property_Size_Description"].value_counts()

unknown    6953
Name: Property_Size_Description, dtype: int64

In [46]:
def change_size_descriptions(description):
    """Reencodes the property size description feature and merges two levels
    
    The merged levels both represent properties that are > 125 sq metres in size
    """
    if pd.isnull(description):
        return description

    else:
        if description == "less than 38 sq metres":
            return "< 38"

        elif description == "greater than or equal to 38 sq metres and less than 125 sq metres":
            return "38 - 125"

        elif description == "unknown":
            return "unknown"

        else:
            return ">= 125"

In [47]:
df["Property_Size_Description"] = df.apply(lambda x: change_size_descriptions(x["Property_Size_Description"]), axis=1)
df["Property_Size_Description"].value_counts()

unknown    6953
Name: Property_Size_Description, dtype: int64

In [48]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description
0,2022-01-15,"24 forest walk, swords, dublin",unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,unknown
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,unknown
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000.0,New Dwelling house /Apartment,unknown
3,2022-01-16,"montevideo, hospital hill, bunclody",unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,unknown
4,2022-01-17,"1 cill beg manor, stradbally, laois",unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,unknown


## Section 6: Date of Sale

### Section 6.1: Create a month of sale feature

In [49]:
df["Month_of_Sale"] = df["Date_of_Sale"].dt.month_name().astype("category")

### Section 6.2: Create a quarter of sale feature

In [50]:
df["Quarter_of_Sale"] = df["Date_of_Sale"].dt.quarter.astype("category")

In [51]:
df.head()

Unnamed: 0,Date_of_Sale,Address,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description,Month_of_Sale,Quarter_of_Sale
0,2022-01-15,"24 forest walk, swords, dublin",unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
1,2022-01-15,"5 cragaun, father russell road, dooradoyle",unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
2,2022-01-16,"19 the lawn, mullen park, maynooth",unknown,Kildare,465000.0,New Dwelling house /Apartment,unknown,January,1
3,2022-01-16,"montevideo, hospital hill, bunclody",unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
4,2022-01-17,"1 cill beg manor, stradbally, laois",unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,unknown,January,1


## Section 7: Drop unused feaures

In [52]:
df.drop(["Address"], axis=1, inplace=True)

In [53]:
df.head()

Unnamed: 0,Date_of_Sale,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description,Month_of_Sale,Quarter_of_Sale
0,2022-01-15,unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
1,2022-01-15,unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
2,2022-01-16,unknown,Kildare,465000.0,New Dwelling house /Apartment,unknown,January,1
3,2022-01-16,unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
4,2022-01-17,unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,unknown,January,1


In [54]:
df.isna().sum()

Date_of_Sale                 0
Postal_Code                  0
County                       0
Price_€_VAT_incl             0
Description_of_Property      0
Property_Size_Description    0
Month_of_Sale                0
Quarter_of_Sale              0
dtype: int64

### Section 8: Save final dataset

In [55]:
df.head()

Unnamed: 0,Date_of_Sale,Postal_Code,County,Price_€_VAT_incl,Description_of_Property,Property_Size_Description,Month_of_Sale,Quarter_of_Sale
0,2022-01-15,unknown,Dublin,154000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
1,2022-01-15,unknown,Limerick,370000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
2,2022-01-16,unknown,Kildare,465000.0,New Dwelling house /Apartment,unknown,January,1
3,2022-01-16,unknown,Wexford,100000.0,Second-Hand Dwelling house /Apartment,unknown,January,1
4,2022-01-17,unknown,Laois,225000.0,Second-Hand Dwelling house /Apartment,unknown,January,1


In [56]:
df.to_csv('cleaned_unseen_data.csv', index=False)