<a href="https://colab.research.google.com/github/lt33tx/Landon_Tinch_DTSC3020_Fall2025-/blob/main/3010_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This chunk basically loads the Housing.csv file using pandas, checks if it actually exists, and if it does, it prints the first few rows and some basic info so I can make sure the dataset loaded correctly.

In [117]:
import pandas as pd  # import pandas for data handling
import numpy as np   # import numpy for numerical operations
import os            # import os for file system tools

file_name_for_student_project = 'Housing.csv'  # set the file name we want to load

try:
    housing_data_frame = pd.read_csv(file_name_for_student_project)  # try loading the CSV file
except FileNotFoundError:
    print("Error: Make sure 'Housing.csv' is in the right spot")  # tell user if the file isn't found
    housing_data_frame = None  # set to None if loading fails

if housing_data_frame is not None:
    print("DataFrame Loaded.")  # confirm the data actually loaded
    print(housing_data_frame.head())  # show first few rows for a quick look
    print(housing_data_frame.info())  # display column types and data summary


DataFrame Loaded.
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data colu

This part loops through every column in the dataset to check how many values are missing, shows the results, and then prints a final table so I know exactly which columns (if any) have gaps. Basically just a full missing-data check.

In [118]:
missing_data_summary = {}  # store missing data stats


for column_name in housing_data_frame.columns:  # loop through each column

    null_count = housing_data_frame[column_name].isnull().sum()  # count missing values

    total_rows = len(housing_data_frame)  # total number of rows

    missing_percentage = (null_count / total_rows) * 100  # percent of missing values

    missing_data_summary[column_name] = {'Count': null_count, 'Percentage': missing_percentage}  # save results

    if null_count > 0:  # if the column has missing data
        print(f"Found missing data in '{column_name}': {null_count} values ({missing_percentage:.2f}%)")
    else:  # if no missing data
        print(f"'{column_name}' is clean! (0 missing values)")

missing_df = pd.DataFrame(missing_data_summary).T  # convert dict to DataFrame
print(missing_df)  # print the summary table


'price' is clean! (0 missing values)
'area' is clean! (0 missing values)
'bedrooms' is clean! (0 missing values)
'bathrooms' is clean! (0 missing values)
'stories' is clean! (0 missing values)
'mainroad' is clean! (0 missing values)
'guestroom' is clean! (0 missing values)
'basement' is clean! (0 missing values)
'hotwaterheating' is clean! (0 missing values)
'airconditioning' is clean! (0 missing values)
'parking' is clean! (0 missing values)
'prefarea' is clean! (0 missing values)
'furnishingstatus' is clean! (0 missing values)
                  Count  Percentage
price               0.0         0.0
area                0.0         0.0
bedrooms            0.0         0.0
bathrooms           0.0         0.0
stories             0.0         0.0
mainroad            0.0         0.0
guestroom           0.0         0.0
basement            0.0         0.0
hotwaterheating     0.0         0.0
airconditioning     0.0         0.0
parking             0.0         0.0
prefarea            0.0         0

This section goes through the main numeric columns and uses the IQR method to find any extreme outliers, then caps them so they don’t mess up the analysis. Basically just cleaning up crazy high or low values.

In [119]:
numerical_features_to_clean = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']  # list of numeric columns to fix

for feature_name in numerical_features_to_clean:  # loop through each numeric feature

    Q1_value = housing_data_frame[feature_name].quantile(0.25)  # get 25th percentile
    Q3_value = housing_data_frame[feature_name].quantile(0.75)  # get 75th percentile

    Interquartile_Range = Q3_value - Q1_value  # calculate IQR

    low_fence = Q1_value - 1.5 * Interquartile_Range  # lower limit for outliers
    high_fence = Q3_value + 1.5 * Interquartile_Range  # upper limit for outliers


    high_outliers_count = len(housing_data_frame[housing_data_frame[feature_name] > high_fence])  # count high outliers
    if high_outliers_count > 0:  # check if any high outliers exist
        print(f"Capping {high_outliers_count} high outliers in {feature_name} at value {high_fence:.2f}")  # tell user what's capped
        housing_data_frame.loc[housing_data_frame[feature_name] > high_fence, feature_name] = high_fence  # cap high outliers

    low_outliers_count = len(housing_data_frame[housing_data_frame[feature_name] < low_fence])  # count low outliers
    if low_outliers_count > 0:  # check if any low outliers exist
        print(f"Capping {low_outliers_count} low outliers in {feature_name} at value {low_fence:.2f}")  # tell user what's capped
        housing_data_frame.loc[housing_data_frame[feature_name] < low_fence, feature_name] = low_fence  # cap low outliers

print("Sucess Capping Complete")


Capping 15 high outliers in price at value 9205000.00
Capping 12 high outliers in area at value 10500.00
Capping 12 high outliers in bedrooms at value 4.50
Capping 1 high outliers in bathrooms at value 3.50
Capping 41 high outliers in stories at value 3.50
Capping 12 high outliers in parking at value 2.50
Sucess Capping Complete


  housing_data_frame.loc[housing_data_frame[feature_name] > high_fence, feature_name] = high_fence  # cap high outliers
  housing_data_frame.loc[housing_data_frame[feature_name] > high_fence, feature_name] = high_fence  # cap high outliers
  housing_data_frame.loc[housing_data_frame[feature_name] > high_fence, feature_name] = high_fence  # cap high outliers
  housing_data_frame.loc[housing_data_frame[feature_name] > high_fence, feature_name] = high_fence  # cap high outliers


This part checks all the yes/no columns to make sure they only contain the right values, and also looks at the furnishing status column just to confirm nothing weird is in there. Basically making sure the categorical labels are clean.

In [120]:
binary_columns_for_check = ['mainroad', 'guestroom', 'basement',
                            'hotwaterheating', 'airconditioning', 'prefarea']  # list of yes/no columns to inspect


for cat_column in binary_columns_for_check:  # loop through each categorical column
    unique_values = housing_data_frame[cat_column].unique()  # get all unique values in the column

    if set(unique_values) == {'yes', 'no'}:  # check if values are only yes/no
        print(f"'{cat_column}' looks good! Values are: {unique_values}")  # tell us everything is fine
    else:
        print(f" WARNING: Inconsistent values found in '{cat_column}': {unique_values}")  # warn if something is off

furnishing_unique = housing_data_frame['furnishingstatus'].unique()  # get unique values for furnishing status
print(f"\nStatus check for 'furnishingstatus': {furnishing_unique}")  # print them out to verify

'mainroad' looks good! Values are: ['yes' 'no']
'guestroom' looks good! Values are: ['no' 'yes']
'basement' looks good! Values are: ['no' 'yes']
'hotwaterheating' looks good! Values are: ['no' 'yes']
'airconditioning' looks good! Values are: ['yes' 'no']
'prefarea' looks good! Values are: ['yes' 'no']

Status check for 'furnishingstatus': ['furnished' 'semi-furnished' 'unfurnished']


This section randomly grabs a small set of rows from the cleaned numeric columns so I can double-check that everything looks normal after cleaning. Just a quick sanity check sample.

In [121]:
cleaned_numerical_features = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']  # pick the numeric columns we want to show

cleaned_data_for_display = housing_data_frame[cleaned_numerical_features].copy()  # make a copy so we don't change the original

number_of_samples = 15  # choose how many random rows to show
print(f"--- True Random Sample of {number_of_samples} Rows (Numerical Features Only) ---")  # announce random sampling
print("This output should change every time you run it.")  # remind user sampling is random

print(cleaned_data_for_display.sample(n=number_of_samples))  # print the random sample rows


--- True Random Sample of 15 Rows (Numerical Features Only) ---
This output should change every time you run it.
       price  area  bedrooms  bathrooms  stories  parking
386  3535000  3850       3.0        1.0      1.0      2.0
226  4690000  5170       3.0        1.0      3.5      0.0
3    9205000  7500       4.0        2.0      2.0      2.5
474  2975000  4352       4.0        1.0      2.0      1.0
62   7070000  6240       4.0        2.0      2.0      1.0
350  3780000  3420       2.0        1.0      2.0      1.0
317  4060000  4992       3.0        2.0      2.0      2.0
250  4515000  3510       3.0        1.0      3.0      0.0
14   9205000  7800       3.0        2.0      2.0      0.0
4    9205000  7420       4.0        1.0      2.0      2.0
9    9205000  5750       3.0        2.0      3.5      1.0
361  3710000  6020       3.0        1.0      1.0      0.0
119  5950000  7020       3.0        1.0      1.0      2.0
441  3220000  4370       3.0        1.0      2.0      0.0
483  2940000  661

This part prints the summary stats for all the numeric columns so I can see how the data looks after cleaning—especially to check if the outlier capping actually worked.

In [122]:
numerical_features = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']  # list of numeric columns to summarize

stats_df = housing_data_frame[numerical_features].copy()  # make a copy of just the numeric data

print(stats_df.describe())  # show summary statistics


              price          area    bedrooms   bathrooms     stories  \
count  5.450000e+02    545.000000  545.000000  545.000000  545.000000   
mean   4.727388e+06   5102.249541    2.950459    1.285321    1.767890   
std    1.745227e+06   2005.804353    0.697504    0.497942    0.777543   
min    1.750000e+06   1650.000000    1.000000    1.000000    1.000000   
25%    3.430000e+06   3600.000000    2.000000    1.000000    1.000000   
50%    4.340000e+06   4600.000000    3.000000    1.000000    2.000000   
75%    5.740000e+06   6360.000000    3.000000    2.000000    2.000000   
max    9.205000e+06  10500.000000    4.500000    3.500000    3.500000   

          parking  
count  545.000000  
mean     0.682569  
std      0.834773  
min      0.000000  
25%      0.000000  
50%      0.000000  
75%      1.000000  
max      2.500000  


This section just shows the first chunk of the fully cleaned dataset and the data types so I can double-check everything looks correct before moving on to feature engineering.

In [123]:
print(housing_data_frame.head(15))  # display first 15 rows

print(housing_data_frame.info())  # print info about the DataFrame


      price   area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0   9205000   7420       4.0        2.0      3.0      yes        no       no   
1   9205000   8960       4.0        3.5      3.5      yes        no       no   
2   9205000   9960       3.0        2.0      2.0      yes        no      yes   
3   9205000   7500       4.0        2.0      2.0      yes        no      yes   
4   9205000   7420       4.0        1.0      2.0      yes       yes      yes   
5   9205000   7500       3.0        3.0      1.0      yes        no      yes   
6   9205000   8580       4.0        3.0      3.5      yes        no       no   
7   9205000  10500       4.5        3.0      2.0      yes        no       no   
8   9205000   8100       4.0        1.0      2.0      yes       yes      yes   
9   9205000   5750       3.0        2.0      3.5      yes       yes       no   
10  9205000  10500       3.0        1.0      2.0      yes        no      yes   
11  9205000   6000       4.0        3.0 

This part converts all the yes/no columns into 1s and 0s so the model can actually use them—basically turning the binary text data into numeric form.

In [124]:
binary_features = ['mainroad', 'guestroom', 'basement',
                   'hotwaterheating', 'airconditioning', 'prefarea']  # list of yes/no columns to convert


Yes_No_Mapper = {'yes': 1, 'no': 0}  # mapping dictionary for yes/no to 1/0

housing_data_frame['mainroad'] = housing_data_frame['mainroad'].replace(Yes_No_Mapper)  # convert mainroad
housing_data_frame['guestroom'] = housing_data_frame['guestroom'].replace(Yes_No_Mapper)  # convert guestroom
housing_data_frame['basement'] = housing_data_frame['basement'].replace(Yes_No_Mapper)  # convert basement
housing_data_frame['hotwaterheating'] = housing_data_frame['hotwaterheating'].replace(Yes_No_Mapper)  # convert hotwaterheating
housing_data_frame['airconditioning'] = housing_data_frame['airconditioning'].replace(Yes_No_Mapper)  # convert airconditioning
housing_data_frame['prefarea'] = housing_data_frame['prefarea'].replace(Yes_No_Mapper)  # convert prefarea

print("Sucess columns converted ")


Sucess columns converted 


  housing_data_frame['mainroad'] = housing_data_frame['mainroad'].replace(Yes_No_Mapper)  # convert mainroad
  housing_data_frame['guestroom'] = housing_data_frame['guestroom'].replace(Yes_No_Mapper)  # convert guestroom
  housing_data_frame['basement'] = housing_data_frame['basement'].replace(Yes_No_Mapper)  # convert basement
  housing_data_frame['hotwaterheating'] = housing_data_frame['hotwaterheating'].replace(Yes_No_Mapper)  # convert hotwaterheating
  housing_data_frame['airconditioning'] = housing_data_frame['airconditioning'].replace(Yes_No_Mapper)  # convert airconditioning
  housing_data_frame['prefarea'] = housing_data_frame['prefarea'].replace(Yes_No_Mapper)  # convert prefarea


This part turns the 'furnishingstatus' column into dummy variables (one-hot encoding) so each category becomes its own numeric column, and then it drops the original text column—basically prepping it for modeling.

In [125]:

column_name_to_check = 'furnishingstatus'  # column to encode

if column_name_to_check in housing_data_frame.columns:  # make sure column exists

    status_dummies_raw = pd.get_dummies(housing_data_frame[column_name_to_check], drop_first=True, prefix='status')  # create dummy variables

    housing_data_frame = pd.concat([housing_data_frame, status_dummies_raw], axis=1)  # add dummy columns to df

    housing_data_frame.drop(column_name_to_check, axis=1, inplace=True)  # remove original column

    print([col for col in housing_data_frame.columns if 'status' in col or 'area' in col])  # print relevant headers

else:  # if column already deleted
    print(f"WARNING: The column '{column_name_to_check}' has already been processed and deleted.")  # warn user

['area', 'prefarea', 'status_semi-furnished', 'status_unfurnished']


This part standardizes all the numeric columns using Z-scores so each feature has a mean of 0 and a standard deviation of 1 basically making the scales comparable for modeling.

In [126]:
# CODE BLOCK 2A: Manual Z-Score Standardization

# List of all the numerical columns to scale, including 'price'
numeric_features_to_scale = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']


# A student might define a function for this, even though Pandas can do it in one line.
def manual_z_score_scaler(series):
    # Calculate the mean (mu) and standard deviation (sigma) of the input series
    mu = series.mean()
    sigma = series.std()

    # Apply the Z-score formula: (v - mu) / sigma
    scaled_series = (series - mu) / sigma
    return scaled_series

# Apply the function to each column manually
housing_data_frame['price'] = manual_z_score_scaler(housing_data_frame['price'])
housing_data_frame['area'] = manual_z_score_scaler(housing_data_frame['area'])
housing_data_frame['bedrooms'] = manual_z_score_scaler(housing_data_frame['bedrooms'])
housing_data_frame['bathrooms'] = manual_z_score_scaler(housing_data_frame['bathrooms'])
housing_data_frame['stories'] = manual_z_score_scaler(housing_data_frame['stories'])
housing_data_frame['parking'] = manual_z_score_scaler(housing_data_frame['parking'])

print("Sucuss All numerical features have been standardized (Mean ≈ 0, Std ≈ 1).")

Sucuss All numerical features have been standardized (Mean ≈ 0, Std ≈ 1).


This part double-checks that all the numeric features are properly standardized (mean ≈ 0, std ≈ 1) and then shows the first few rows of the final cleaned DataFrame, basically making sure everything’s ready for modeling.

In [127]:


verification_stats = housing_data_frame[['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking']].describe() # get descriptive stats for numeric features

print(verification_stats.loc[['mean', 'std']].round(4)) # show only mean and std rounded to 4 decimals

print(housing_data_frame.head()) # show first 5 rows of final DataFrame

      price  area  bedrooms  bathrooms  stories  parking
mean    0.0  -0.0      -0.0       -0.0      0.0     -0.0
std     1.0   1.0       1.0        1.0      1.0      1.0
      price      area  bedrooms  bathrooms   stories  mainroad  guestroom  \
0  2.565633  1.155522  1.504710   1.435266  1.584619         1          0   
1  2.565633  1.923293  1.504710   4.447667  2.227670         1          0   
2  2.565633  2.421847  0.071027   1.435266  0.298517         1          0   
3  2.565633  1.195406  1.504710   1.435266  0.298517         1          0   
4  2.565633  1.155522  1.504710  -0.573001  0.298517         1          1   

   basement  hotwaterheating  airconditioning   parking  prefarea  \
0         0                0                1  1.578191         1   
1         0                0                1  2.177156         0   
2         1                0                0  1.578191         1   
3         1                0                1  2.177156         1   
4         1          

This part calculates and shows how all the numeric features relate to each other and to price, helping me spot which features might be weak and could potentially be removed.

In [128]:

Correlation_Table = housing_data_frame.corr()  # calculate correlation matrix for all numeric features

print(Correlation_Table.round(3))  # print rounded correlations

price_correlations = Correlation_Table['price'].sort_values(ascending=False)  # sort correlations with price

print(price_correlations)  # print sorted correlations


                       price   area  bedrooms  bathrooms  stories  mainroad  \
price                  1.000  0.554     0.382      0.509    0.416     0.309   
area                   0.554  1.000     0.160      0.198    0.076     0.303   
bedrooms               0.382  0.160     1.000      0.380    0.450    -0.006   
bathrooms              0.509  0.198     0.380      1.000    0.315     0.042   
stories                0.416  0.076     0.450      0.315    1.000     0.116   
mainroad               0.309  0.303    -0.006      0.042    0.116     1.000   
guestroom              0.274  0.163     0.088      0.128    0.037     0.092   
basement               0.191  0.055     0.102      0.104   -0.164     0.044   
hotwaterheating        0.102 -0.017     0.041      0.068    0.032    -0.012   
airconditioning        0.462  0.240     0.176      0.186    0.277     0.105   
parking                0.381  0.368     0.150      0.175    0.029     0.206   
prefarea               0.329  0.251     0.092      0

This part removes the feature with the weakest correlation to price ('hotwaterheating') to simplify the dataset and keep only the most relevant columns for modeling.

In [131]:
feature_to_eliminate = 'hotwaterheating' # Assuming this was the feature with the weakest correlation  # choose which feature to remove

print(f"\n Decision Time: Eliminating Feature '{feature_to_eliminate}' ---")  # announce deletion process

if feature_to_eliminate in housing_data_frame.columns:  # check if the column still exists

    housing_data_frame.drop(feature_to_eliminate, axis=1, inplace=True)  # remove the selected feature

    print(f"Successfully eliminated the feature: '{feature_to_eliminate}'.")  # confirm it was removed

else:
    print(f"Feature '{feature_to_eliminate}' was already removed from the DataFrame. Skipping elimination.")  # warn if it was already gone

print("\nFinal count of features remaining after reduction:")  # show how many features remain
print(f"Total Columns: {len(housing_data_frame.columns)}")  # print total number of columns



 Decision Time: Eliminating Feature 'hotwaterheating' ---
Feature 'hotwaterheating' was already removed from the DataFrame. Skipping elimination.

Final count of features remaining after reduction:
Total Columns: 14


This part splits the 'area' values into 4 size categories (small to extra large) so we can work with a simplified, binned version of the feature instead of raw numbers.

In [132]:


NUM_BINS_FOR_AREA = 4  # number of bins we want to split area into

area_labels_names = ['Bin_Small', 'Bin_Medium', 'Bin_Large', 'Bin_XLarge']  # names for each bin

housing_data_frame['Area_Binned_Category'] = pd.cut(
    housing_data_frame['area'],
    bins=NUM_BINS_FOR_AREA,
    labels=area_labels_names,
    include_lowest=True
)  # create a new binned category column

print(f"Created a new column 'Area_Binned_Category' using {NUM_BINS_FOR_AREA} equal-width bins.")  # confirm new column made
print(housing_data_frame['Area_Binned_Category'].value_counts())  # print bin counts


Created a new column 'Area_Binned_Category' using 4 equal-width bins.
Area_Binned_Category
Bin_Medium    209
Bin_Small     184
Bin_Large     110
Bin_XLarge     42
Name: count, dtype: int64


This part groups the 'bedrooms' feature into 3 custom categories based on standard deviation so we can simplify the data and see patterns more easily instead of using raw bedroom counts.

In [133]:

# define cutoffs using standard deviation
BEDROOM_CUTOFF_1 = housing_data_frame['bedrooms'].std() * 1.5  # set 1.5*std as cutoff for large
BEDROOM_MIN = housing_data_frame['bedrooms'].min()  # minimum value
BEDROOM_MAX = housing_data_frame['bedrooms'].max()  # maximum value

custom_bedroom_bins = sorted([BEDROOM_MIN, 0.0, BEDROOM_CUTOFF_1, BEDROOM_MAX])  # create sorted custom bins
bedroom_group_labels = ['Bed_Small_to_Average', 'Bed_Large', 'Bed_Huge_Count']  # labels for bins

housing_data_frame['Bedroom_Groups'] = pd.cut(
    housing_data_frame['bedrooms'],
    bins=custom_bedroom_bins,
    labels=bedroom_group_labels,
    include_lowest=True,
    right=False
)  # apply bins to create new column

print(housing_data_frame['Bedroom_Groups'].value_counts())  # print counts per bin

Bedroom_Groups
Bed_Large               300
Bed_Small_to_Average    138
Bed_Huge_Count           95
Name: count, dtype: int64


This part turns the new binned features (area and bedrooms) into dummy/one-hot columns so the categories become numeric and ready for modeling, then drops the original binned columns.

In [134]:

# list columns to process if they exist
cols_to_process = []
if 'Area_Binned_Category' in housing_data_frame.columns:
    cols_to_process.append('Area_Binned_Category')  # add area bin column if present
if 'Bedroom_Groups' in housing_data_frame.columns:
    cols_to_process.append('Bedroom_Groups')  # add bedroom group column if present

if cols_to_process:  # only run if there are columns to process
    discretized_dummies_raw = pd.get_dummies(housing_data_frame[cols_to_process], drop_first=False)  # create dummy variables

    housing_data_frame = pd.concat([housing_data_frame, discretized_dummies_raw], axis=1)  # add dummy columns to DataFrame

    housing_data_frame.drop(cols_to_process, axis=1, inplace=True)  # remove old categorical columns

    print(f"Original binned columns deleted. Total Columns Remaining: {len(housing_data_frame.columns)}")  # show new column count

else:  # skip if nothing to process
    print("WARNING: Discretization columns already processed and deleted. Skipping OHE step.")  # warning to me

print("\nFinal Column Headers (Partial List to Show New Columns):")  # show example of final columns
print([col for col in housing_data_frame.columns if 'Area' in col or 'Bed' in col or 'price' in col])  # print partial headers

Original binned columns deleted. Total Columns Remaining: 20

Final Column Headers (Partial List to Show New Columns):
['price', 'Area_Binned_Category_Bin_Small', 'Area_Binned_Category_Bin_Medium', 'Area_Binned_Category_Bin_Large', 'Area_Binned_Category_Bin_XLarge', 'Bedroom_Groups_Bed_Small_to_Average', 'Bedroom_Groups_Bed_Large', 'Bedroom_Groups_Bed_Huge_Count']


This part does a final check of the cleaned and transformed dataset—showing its shape, data types, and a preview just to make sure everything is numeric and ready for modeling.

In [135]:

print(f"Final DataFrame Shape (Rows, Columns): {housing_data_frame.shape}")  # show rows and columns

housing_data_frame.info()  # show column types and non-null counts

print(housing_data_frame.head(10))  # show first 10 rows


Final DataFrame Shape (Rows, Columns): (545, 20)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 20 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   price                                545 non-null    float64
 1   area                                 545 non-null    float64
 2   bedrooms                             545 non-null    float64
 3   bathrooms                            545 non-null    float64
 4   stories                              545 non-null    float64
 5   mainroad                             545 non-null    int64  
 6   guestroom                            545 non-null    int64  
 7   basement                             545 non-null    int64  
 8   airconditioning                      545 non-null    int64  
 9   parking                              545 non-null    float64
 10  prefarea                             545 non-null

In [136]:
import pandas as pd
import numpy as np


Correlation_Matrix_Full_Set = housing_data_frame.corr()

price_target_correlations_raw = Correlation_Matrix_Full_Set['price']