# Import necessary libraries and custom modules

In [None]:
import pandas as pd
import numpy as np
from data_preprocessing import clean_data, engineer_features
from visualization import (
    plot_price_distribution, 
    plot_property_type_counts, 
    plot_price_vs_area, 
    plot_district_prices, 
    plot_correlation_heatmap, 
    plot_pairplot, 
    create_interactive_map
)
import missingno as msno
import warnings
warnings.filterwarnings("ignore")

# Load the data

In [None]:
df1 = pd.read_csv('./data/portugal_housing_chunk_1.csv')
df2 = pd.read_csv('./data/portugal_housing_chunk_2.csv')
df3 = pd.read_csv('./data/portugal_housing_chunk_3.csv')
df4 = pd.read_csv('./data/portugal_housing_chunk_4.csv')
# Combine both chunks into a single DataFrame
df = pd.concat([df1, df2], ignore_index=True)

# Display initial rows to understand the data
df.head()

# Inspect and clean the data

In [None]:
print("Initial data shape:", df.shape)
print("Columns with missing values:")
msno.bar(df)
msno.heatmap(df)

# Clean the dataset using the `clean_data` function from data_preprocessing.py
df = clean_data(df)
print("Data shape after cleaning:", df.shape)

# Check for remaining missing values
df.isnull().sum()

# Feature engineering

In [None]:
df = engineer_features(df)

# Display the first few rows to verify feature engineering results
df[['Price', 'UsableArea', 'PricePerSqMeter', 'ConstructionYear', 'Age']].head()

# Distribution of property prices

In [None]:
plot_price_distribution(df)

# Count of properties by type

In [None]:
plot_property_type_counts(df)

# Scatter plot of Price vs Usable Area

In [None]:
plot_price_vs_area(df)

# Price per square meter by district

In [None]:
plot_district_prices(df)

# Correlation heatmap of numerical features

In [None]:
plot_correlation_heatmap(df)

# Pair plot for key numerical features

In [None]:
plot_pairplot(df)

# Summarize key findings from the EDA

In [None]:
print("Summary of Findings:")
print("""
- The property prices vary significantly across Portugal, with certain districts showing higher price per square meter.
- There appears to be a relationship between property size and price, but other factors like location and property type also play a role.
- Energy efficiency ratings and property age could be further explored to understand their impact on prices.
""")