In [None]:
# Check Java version (Colab includes OpenJDK 11)
!java -version

# Install PySpark (already installed but safe to rerun)
!pip install pyspark --quiet

import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64" # Changed to Java 17 path

from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .master("local[*]") \
    .appName("ColabPySpark") \
    .getOrCreate()

spark

openjdk version "17.0.16" 2025-07-15
OpenJDK Runtime Environment (build 17.0.16+8-Ubuntu-0ubuntu122.04.1)
OpenJDK 64-Bit Server VM (build 17.0.16+8-Ubuntu-0ubuntu122.04.1, mixed mode, sharing)


In [None]:
# loading in csv file
Housing = spark.read.csv("/content/Housing prices.csv", header=True, inferSchema=True)
urban_area = spark.read.csv("/content/Urban_Areas_National_Statistical_Boundaries_2022_Ungeneralised_View_-4618039361375901136.csv", header=True, inferSchema=True)
town_names = spark.read.csv("/content/Centres_of_Population___OSi_National_Placenames_Gazetteer_-4856559463974628035.csv", header=True,inferSchema=True)



In [None]:
from pyspark.sql import functions as F

# Convert df1['English_Na'] # Irish Name to a Python list and lowercase
town_list = [t.lower() for t in town_names.select("English_Na", "Irish_Name").rdd.flatMap(lambda x: x).collect()]


towns_b = spark.sparkContext.broadcast(set(town_list))

In [None]:
from pyspark.sql.types import StringType
from pyspark.sql.functions import udf, col

def extract_town(address):
    if not address:
        return None
    parts = [p.strip().lower() for p in address.split(",")]
    # Check each part against town list (start from end for better accuracy)
    for p in parts[::-1]:
        if p in towns_b.value:
            return p.title()
    return None

extract_town_udf = udf(extract_town, StringType())

In [None]:
Housing = Housing.withColumn("town", extract_town_udf(col("address")))
Housing.select("address", "town").show(20, False)

+----------------------------------------------+-----------+
|address                                       |town       |
+----------------------------------------------+-----------+
|5 Braemor Drive, Churchtown, Co.Dublin        |Churchtown |
|134 Ashewood Walk, Summerhill Lane, Portlaoise|Portlaoise |
|1 Meadow Avenue, Dundrum, Dublin 14           |Dundrum    |
|1 The Haven, Mornington                       |NULL       |
|11 Melville Heights, Kilkenny                 |Kilkenny   |
|12 Sallymount Avenue, Ranelagh                |Ranelagh   |
|13  Oakleigh Wood, Dooradoyle, Limerick       |Limerick   |
|13 The Drive, Chapelstown Gate, Tullow Road   |NULL       |
|15 Carriglawn, Waterpark, Carrigaline         |Carrigaline|
|15a Moore Bay, Kilkee                         |Kilkee     |
|16 Aisling Geal, Fr. Russell Road             |NULL       |
|206 Philipsburgh Avenue, Marino, Dublin 3     |Marino     |
|22 Laverna Way, Castleknock, Dublin 15        |Castleknock|
|23 Elton Park, Sandycov

In [None]:
# columns i want to keep:
keep_cols = [
    'Date of Sale (dd/mm/yyyy)',
    'Address',
    'County',
    'Not Full Market Price',
    'VAT Exclusive',
    'Description of Property',
    'Property Size Description',
    'Price_2',
    'town'
]

housing_clean = Housing.select(*keep_cols)
housing_clean.show()

+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+---------+-----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|       town|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+---------+-----------+
|               01/01/2010|5 Braemor Drive, ...|  Dublin|                   No|           No|   Second-Hand Dwell...|                     NULL| 343000.0| Churchtown|
|               03/01/2010|134 Ashewood Walk...|   Laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...| 185000.0| Portlaoise|
|               04/01/2010|1 Meadow Avenue, ...|  Dublin|                   No|           No|   Second-Hand Dwell...|                     NULL| 438500.0|    Dundrum|
|   

In [None]:
#remove columns where price is null:
housing_clean = housing_clean.na.drop(subset=["Price_2"])
print(housing_clean.count())
housing_clean.show()

756586
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+---------+-----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|       town|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+---------+-----------+
|               01/01/2010|5 Braemor Drive, ...|  Dublin|                   No|           No|   Second-Hand Dwell...|                     NULL| 343000.0| Churchtown|
|               03/01/2010|134 Ashewood Walk...|   Laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...| 185000.0| Portlaoise|
|               04/01/2010|1 Meadow Avenue, ...|  Dublin|                   No|           No|   Second-Hand Dwell...|                     NULL| 438500.0|    Dundru

In [None]:
# removing duplicates and rows where town is null
housing_clean.dropDuplicates()
housing_clean = housing_clean.filter(housing_clean["town"].isNotNull())
housing_clean.count()


583519

In [None]:
housing_clean.filter(housing_clean["town"] == "Dingle").show() # check

+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+
|Date of Sale (dd/mm/yyyy)|             Address|County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|  town|
+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+
|               22/01/2010|4 Killelane, Ding...| Kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 255000.0|Dingle|
|               27/01/2010|   Coum Gath, Dingle| Kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 210000.0|Dingle|
|               11/02/2010|   Gortanora, Dingle| Kerry|                   No|           No|   Second-Hand Dwell...|                     NULL|273127.75|Dingle|
|               02/03/2010|  Doonsheane, Dingl

### lower case town names


In [None]:
town_names = town_names.select(
    "Irish_Name",
    "English_Na",
    "County",
    "Contae"
)

In [None]:
# lower case town name and county column
from pyspark.sql.functions import lower, col

house_cleaning = housing_clean.withColumn("town", lower(col("town"))) \
       .withColumn("County", lower(col("County")))

In [None]:
house_cleaning.show()

+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description| Price_2|       town|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+
|               01/01/2010|5 Braemor Drive, ...|  dublin|                   No|           No|   Second-Hand Dwell...|                     NULL|343000.0| churchtown|
|               03/01/2010|134 Ashewood Walk...|   laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...|185000.0| portlaoise|
|               04/01/2010|1 Meadow Avenue, ...|  dublin|                   No|           No|   Second-Hand Dwell...|                     NULL|438500.0|    dundrum|
|         

In [None]:
town_names.show()
town_names.filter(town_names["English_Na"] == "Dingle").show()

+--------------------+--------------+------+-----------+
|          Irish_Name|    English_Na|County|     Contae|
+--------------------+--------------+------+-----------+
|          Fionnmhach|       Fennagh|CARLOW|Ceatharlach|
|     An Gharbhchoill|     Garryhill|CARLOW|Ceatharlach|
|       Baile Haicéid|   Hacketstown|CARLOW|Ceatharlach|
|         Cill Bhríde|      Kilbride|CARLOW|Ceatharlach|
|        Cill Dheirge|     Killerrig|CARLOW|Ceatharlach|
|Leithghlinn an Dr...|Leighlinbridge|CARLOW|Ceatharlach|
|              Balana|        Ballon|CARLOW|Ceatharlach|
|     Seanleithghlinn|   Oldleighlin|CARLOW|Ceatharlach|
|         Muine Bheag|  Bagenalstown|CARLOW|Ceatharlach|
|              Míseal|       Myshall|CARLOW|Ceatharlach|
|            An Urnaí|        Nurney|CARLOW|Ceatharlach|
|          Ráth Bhile|     Rathvilly|CARLOW|Ceatharlach|
|                    |       Tiknock|CARLOW|Ceatharlach|
|           An Tulach|        Tullow|CARLOW|Ceatharlach|
|          Ard Aitinn|      Ard

In [None]:
# lower case town name and county column
from pyspark.sql.functions import lower, col

housing_clean = (housing_clean.withColumn("town", lower(col("town"))) \
       .withColumn("County", lower(col("County"))))

town_names = (town_names.withColumn("Irish_Name", lower(col("Irish_Name"))) \
       .withColumn("County", lower(col("County"))) \
       .withColumn("English_Na", lower(col("English_Na"))))

from pyspark.sql.functions import lower, col

urban_area = (
    urban_area
        .withColumn("urban_area_name", lower(col("urban_area_name")))
        .withColumn("county", lower(col("county")))
)


In [None]:
town_names.show()
house_cleaning.show()
urban_area.show()

+--------------------+--------------+------+-----------+
|          Irish_Name|    English_Na|County|     Contae|
+--------------------+--------------+------+-----------+
|          fionnmhach|       fennagh|carlow|Ceatharlach|
|     an gharbhchoill|     garryhill|carlow|Ceatharlach|
|       baile haicéid|   hacketstown|carlow|Ceatharlach|
|         cill bhríde|      kilbride|carlow|Ceatharlach|
|        cill dheirge|     killerrig|carlow|Ceatharlach|
|leithghlinn an dr...|leighlinbridge|carlow|Ceatharlach|
|              balana|        ballon|carlow|Ceatharlach|
|     seanleithghlinn|   oldleighlin|carlow|Ceatharlach|
|         muine bheag|  bagenalstown|carlow|Ceatharlach|
|              míseal|       myshall|carlow|Ceatharlach|
|            an urnaí|        nurney|carlow|Ceatharlach|
|          ráth bhile|     rathvilly|carlow|Ceatharlach|
|                    |       tiknock|carlow|Ceatharlach|
|           an tulach|        tullow|carlow|Ceatharlach|
|          ard aitinn|      ard

In [None]:
housing_clean = housing_clean.join(
    town_names.select("Irish_Name", "English_Na"),
    housing_clean["town"] == town_names["Irish_Name"],
    how="left"
)


In [None]:
housing_clean.filter(housing_clean["town"] == "dingle").show()

+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+----------+----------+
|Date of Sale (dd/mm/yyyy)|             Address|County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|  town|Irish_Name|English_Na|
+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+----------+----------+
|               22/01/2010|4 Killelane, Ding...| kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 255000.0|dingle|      NULL|      NULL|
|               27/01/2010|   Coum Gath, Dingle| kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 210000.0|dingle|      NULL|      NULL|
|               11/02/2010|   Gortanora, Dingle| kerry|                   No|           No|   S

In [None]:
housing_clean.show()

+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+----------+----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description| Price_2|       town|Irish_Name|English_Na|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+----------+----------+
|               01/01/2010|5 Braemor Drive, ...|  dublin|                   No|           No|   Second-Hand Dwell...|                     NULL|343000.0| churchtown|      NULL|      NULL|
|               03/01/2010|134 Ashewood Walk...|   laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...|185000.0| portlaoise|      NULL|      NULL|
|               04/01/2010|1 Meadow Avenue, ...|  dublin|        

In [None]:
from pyspark.sql.functions import coalesce, col

housing_clean = housing_clean.withColumn(
    "English_Na",
    coalesce(col("English_Na"), col("town"))
)


In [None]:
housing_clean.filter(housing_clean["town"] == "dingle").show()

+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+----------+----------+
|Date of Sale (dd/mm/yyyy)|             Address|County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|  town|Irish_Name|English_Na|
+-------------------------+--------------------+------+---------------------+-------------+-----------------------+-------------------------+---------+------+----------+----------+
|               22/01/2010|4 Killelane, Ding...| kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 255000.0|dingle|      NULL|    dingle|
|               27/01/2010|   Coum Gath, Dingle| kerry|                   No|           No|   Second-Hand Dwell...|                     NULL| 210000.0|dingle|      NULL|    dingle|
|               11/02/2010|   Gortanora, Dingle| kerry|                   No|           No|   S

In [None]:
housing_clean.show()

+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+----------+-----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description| Price_2|       town|Irish_Name| English_Na|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+-----------+----------+-----------+
|               01/01/2010|5 Braemor Drive, ...|  dublin|                   No|           No|   Second-Hand Dwell...|                     NULL|343000.0| churchtown|      NULL| churchtown|
|               03/01/2010|134 Ashewood Walk...|   laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...|185000.0| portlaoise|      NULL| portlaoise|
|               04/01/2010|1 Meadow Avenue, ...|  dublin|   

In [None]:
housing_clean = housing_clean.drop("town")


In [None]:
housing_clean = housing_clean.withColumnRenamed("English_Na", "town")


In [None]:
housing_clean.filter((housing_clean["town"] == "dingle") & (housing_clean["County"] == "kildare")).show()

+-------------------------+----------------+-------+---------------------+-------------+-----------------------+-------------------------+---------+----------+------+
|Date of Sale (dd/mm/yyyy)|         Address| County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|Irish_Name|  town|
+-------------------------+----------------+-------+---------------------+-------------+-----------------------+-------------------------+---------+----------+------+
|               08/11/2023|Spa Road, Dingle|kildare|                  Yes|          Yes|   New Dwelling hous...|                     NULL|3019315.0|      NULL|dingle|
+-------------------------+----------------+-------+---------------------+-------------+-----------------------+-------------------------+---------+----------+------+



In [None]:
housing_clean.show()

+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+----------+-----------+
|Date of Sale (dd/mm/yyyy)|             Address|  County|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description| Price_2|Irish_Name|       town|
+-------------------------+--------------------+--------+---------------------+-------------+-----------------------+-------------------------+--------+----------+-----------+
|               01/01/2010|5 Braemor Drive, ...|  dublin|                   No|           No|   Second-Hand Dwell...|                     NULL|343000.0|      NULL| churchtown|
|               03/01/2010|134 Ashewood Walk...|   laois|                   No|          Yes|   New Dwelling hous...|     greater than or e...|185000.0|      NULL| portlaoise|
|               04/01/2010|1 Meadow Avenue, ...|  dublin|                   No|           No|   Second-Hand Dwell...|   

In [None]:
urban_area.show()

+--------+--------------------+---------------+--------------------+---------+-------------+-------------+------------------+----------------+
|OBJECTID|     URBAN_AREA_GUID|URBAN_AREA_CODE|     urban_area_name|   county|   Centroid_x|   Centroid_y|       Shape__Area|   Shape__Length|
+--------+--------------------+---------------+--------------------+---------+-------------+-------------+------------------+----------------+
|       1|0138fb4f-2ab8-403...|          27295|              bearna|   galway|522593.204561|723181.979819|  3989630.23831177|18629.6809484117|
|       2|0139a442-0f36-46d...|          10019|               ardee|    louth|696152.155347|790695.313556|  4747904.28479004|10477.5822516943|
|       3|020a2786-0521-445...|          28125|         ballinamore|  leitrim| 613071.60711|811532.546151|  1205382.26107788|11491.7124087075|
|       4|0261d090-30b7-406...|          11408|             ratoath|    meath|701909.103785|751650.842556|  3067987.87744141|11723.4625093269|

In [None]:
urban_area.filter(urban_area["URBAN_AREA_NAME"] == "Dingle").show()

+--------+---------------+---------------+---------------+------+----------+----------+-----------+-------------+
|OBJECTID|URBAN_AREA_GUID|URBAN_AREA_CODE|urban_area_name|county|Centroid_x|Centroid_y|Shape__Area|Shape__Length|
+--------+---------------+---------------+---------------+------+----------+----------+-----------+-------------+
+--------+---------------+---------------+---------------+------+----------+----------+-----------+-------------+



In [None]:
# all counties have english names now
# create urban column in the dataset
from pyspark.sql.functions import lit

urban_area = urban_area.withColumn("urban", lit("urban"))
urban_area.show()

+--------+--------------------+---------------+--------------------+---------+-------------+-------------+------------------+----------------+-----+
|OBJECTID|     URBAN_AREA_GUID|URBAN_AREA_CODE|     urban_area_name|   county|   Centroid_x|   Centroid_y|       Shape__Area|   Shape__Length|urban|
+--------+--------------------+---------------+--------------------+---------+-------------+-------------+------------------+----------------+-----+
|       1|0138fb4f-2ab8-403...|          27295|              bearna|   galway|522593.204561|723181.979819|  3989630.23831177|18629.6809484117|urban|
|       2|0139a442-0f36-46d...|          10019|               ardee|    louth|696152.155347|790695.313556|  4747904.28479004|10477.5822516943|urban|
|       3|020a2786-0521-445...|          28125|         ballinamore|  leitrim| 613071.60711|811532.546151|  1205382.26107788|11491.7124087075|urban|
|       4|0261d090-30b7-406...|          11408|             ratoath|    meath|701909.103785|751650.842556|

In [None]:
#using key county and urban_area_name give mathcing rows with english_na(town_name) and county a row that staest urban and if not matching say rural


In [None]:
housing_clean.count()

583586

In [None]:
from pyspark.sql.functions import *
urban_area = urban_area.withColumnRenamed("URBAN_AREA_NAME", "town")

housing_clean_new = housing_clean.join(
    urban_area,
    on=["town", "County"],
    how="full"
)

In [None]:
housing_clean_new.filter(housing_clean_new["town"] == "dingle").show()

+------+-------+-------------------------+--------------------+---------------------+-------------+-----------------------+-------------------------+---------+----------+--------+---------------+---------------+----------+----------+-----------+-------------+-----+
|  town| County|Date of Sale (dd/mm/yyyy)|             Address|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|Irish_Name|OBJECTID|URBAN_AREA_GUID|URBAN_AREA_CODE|Centroid_x|Centroid_y|Shape__Area|Shape__Length|urban|
+------+-------+-------------------------+--------------------+---------------------+-------------+-----------------------+-------------------------+---------+----------+--------+---------------+---------------+----------+----------+-----------+-------------+-----+
|dingle|kildare|               08/11/2023|    Spa Road, Dingle|                  Yes|          Yes|   New Dwelling hous...|                     NULL|3019315.0|      NULL|    NULL|           NULL|       

In [None]:
from pyspark.sql.functions import when, col

df = housing_clean_new.withColumn(
    "urban_rural",
    when(col("URBAN_AREA_GUID").isNotNull(), "urban")
    .otherwise("rural")
)

In [None]:
df.groupBy("urban_rural").count().show()

+-----------+------+
|urban_rural| count|
+-----------+------+
|      rural|244819|
|      urban|338964|
+-----------+------+



In [None]:
df = df.dropDuplicates()

In [None]:
df.count()

582927

In [None]:
df.show()

+---------+-------+-------------------------+--------------------+---------------------+-------------+-----------------------+-------------------------+---------+----------+--------+--------------------+---------------+-------------+-------------+----------------+---------------+-----+-----------+
|     town| County|Date of Sale (dd/mm/yyyy)|             Address|Not Full Market Price|VAT Exclusive|Description of Property|Property Size Description|  Price_2|Irish_Name|OBJECTID|     URBAN_AREA_GUID|URBAN_AREA_CODE|   Centroid_x|   Centroid_y|     Shape__Area|  Shape__Length|urban|urban_rural|
+---------+-------+-------------------------+--------------------+---------------------+-------------+-----------------------+-------------------------+---------+----------+--------+--------------------+---------------+-------------+-------------+----------------+---------------+-----+-----------+
|    balla|   mayo|               08/06/2016|48 College Woods,...|                   No|          Yes| 

In [None]:
#final columns to keep
final_df = df.select('Date of Sale (dd/mm/yyyy)','Address','Price_2','town','County','urban_rural','Description of Property' )

In [None]:
final_df.show()

+-------------------------+--------------------+---------+---------+-------+-----------+-----------------------+
|Date of Sale (dd/mm/yyyy)|             Address|  Price_2|     town| County|urban_rural|Description of Property|
+-------------------------+--------------------+---------+---------+-------+-----------+-----------------------+
|               08/06/2016|48 College Woods,...|  74890.0|    balla|   mayo|      urban|   New Dwelling hous...|
|               04/07/2016|21 College Woods,...| 61232.86|    balla|   mayo|      urban|   New Dwelling hous...|
|               09/02/2018|41 College Woods,...| 62555.06|    balla|   mayo|      urban|   New Dwelling hous...|
|               19/03/2013|No. 42, College W...|  69500.0|    balla|   mayo|      urban|   New Dwelling hous...|
|               29/11/2017|31 College Woods,...| 63552.86|    balla|   mayo|      urban|   New Dwelling hous...|
|               26/09/2013|52 College Woods,...| 67552.86|    balla|   mayo|      urban|   New D

In [None]:
# convert urban_rural value for every row with county = dublin to urban

from pyspark.sql.functions import when, col, lit

final_df = final_df.withColumn(
    "urban_rural",
    when(col("County") == "dublin", lit("urban"))
    .otherwise(col("urban_rural"))
)


In [None]:
final_df.show()

+-------------------------+--------------------+---------+---------+-------+-----------+-----------------------+
|Date of Sale (dd/mm/yyyy)|             Address|  Price_2|     town| County|urban_rural|Description of Property|
+-------------------------+--------------------+---------+---------+-------+-----------+-----------------------+
|               08/06/2016|48 College Woods,...|  74890.0|    balla|   mayo|      urban|   New Dwelling hous...|
|               04/07/2016|21 College Woods,...| 61232.86|    balla|   mayo|      urban|   New Dwelling hous...|
|               09/02/2018|41 College Woods,...| 62555.06|    balla|   mayo|      urban|   New Dwelling hous...|
|               19/03/2013|No. 42, College W...|  69500.0|    balla|   mayo|      urban|   New Dwelling hous...|
|               29/11/2017|31 College Woods,...| 63552.86|    balla|   mayo|      urban|   New Dwelling hous...|
|               26/09/2013|52 College Woods,...| 67552.86|    balla|   mayo|      urban|   New D

In [None]:
final_df.groupBy("urban_rural").count().show()

+-----------+------+
|urban_rural| count|
+-----------+------+
|      rural|117165|
|      urban|465762|
+-----------+------+



In [None]:

pdf = final_df.toPandas()
pdf.to_csv("final_df.csv", index=False)
