In [1]:
import numpy as np
import sympy as sp

# --- Settings ---
SEED = 42          
NUM_MATRICES = 3
LOW, HIGH = -10, 10    # inclusive integer range

rng = np.random.default_rng(SEED)

for k in range(1, NUM_MATRICES + 1):
    # Generate a random 3x3 integer matrix in [-10, 10]
    M = sp.Matrix(rng.integers(LOW, HIGH + 1, size=(3, 3)))

    print(f"\n==================== Matrix {k} ====================")
    print("M =")
    sp.pprint(M)

    detM = sp.simplify(M.det())
    print("\ndet(M) =", detM)

    # Inverse exists iff det != 0
    if detM == 0:
        print("\nThis matrix is singular (not invertible). No inverse exists.")
        continue

    Minv = M.inv()  # exact rational inverse
    print("\nM^{-1} =")
    sp.pprint(Minv)

    # Verify identity
    I_check = sp.simplify(M * Minv)
    print("\nM * M^{-1} =")
    sp.pprint(I_check)




M =
⎡-9  6   3 ⎤
⎢          ⎥
⎢-1  -1  8 ⎥
⎢          ⎥
⎣-9  4   -6⎦

det(M) = -273

M^{-1} =
⎡      -16   -17  ⎤
⎢2/21  ────  ──── ⎥
⎢       91    91  ⎥
⎢                 ⎥
⎢      -27   -23  ⎥
⎢2/7   ────  ──── ⎥
⎢       91    91  ⎥
⎢                 ⎥
⎣1/21  6/91  -5/91⎦

M * M^{-1} =
⎡1  0  0⎤
⎢       ⎥
⎢0  1  0⎥
⎢       ⎥
⎣0  0  1⎦

M =
⎡-9  1  10⎤
⎢         ⎥
⎢5   5  5 ⎥
⎢         ⎥
⎣6   0  -8⎦

det(M) = 130

M^{-1} =
⎡-4/13  4/65  -9/26⎤
⎢                  ⎥
⎢              19  ⎥
⎢7/13   6/65   ──  ⎥
⎢              26  ⎥
⎢                  ⎥
⎣-3/13  3/65  -5/13⎦

M * M^{-1} =
⎡1  0  0⎤
⎢       ⎥
⎢0  1  0⎥
⎢       ⎥
⎣0  0  1⎦

M =
⎡7   -1  0 ⎤
⎢          ⎥
⎢-3  -7  9 ⎥
⎢          ⎥
⎣6   3   -2⎦

det(M) = -139

M^{-1} =
⎡13                ⎤
⎢───   2/139  9/139⎥
⎢139               ⎥
⎢                  ⎥
⎢-48    14     63  ⎥
⎢────   ───    ─── ⎥
⎢139    139    139 ⎥
⎢                  ⎥
⎢-33    27     52  ⎥
⎢────   ───    ─── ⎥
⎣139    139    139 ⎦

M * M^{-1} =
⎡1  0  0⎤
⎢       ⎥
⎢

In [3]:
import pandas as pd

# 1) Load data
df = pd.read_csv("kc_house_data.csv")

# 2) Drop columns we must ignore
ignore_cols = ["id", "date", "zipcode"]
df_use = df.drop(columns=ignore_cols)

# 3) Split features and target
y_col = "price"
X_cols = [c for c in df_use.columns if c != y_col]

# -------------------------------
# (1) Mean, min, max, variance
# -------------------------------
stats_table = pd.DataFrame({
    "mean": df_use[X_cols].mean(),
    "min": df_use[X_cols].min(),
    "max": df_use[X_cols].max(),
    "variance": df_use[X_cols].var(ddof=1)  # sample variance
}).sort_values(by="mean")

print("=== Summary stats for all features ===")
display(stats_table)

# Lowest / highest average
lowest_avg_feature = stats_table["mean"].idxmin()
highest_avg_feature = stats_table["mean"].idxmax()

print("\nLowest average feature:", lowest_avg_feature, "=", stats_table.loc[lowest_avg_feature, "mean"])
print("Highest average feature:", highest_avg_feature, "=", stats_table.loc[highest_avg_feature, "mean"])

# Lowest / highest variance
lowest_var_feature = stats_table["variance"].idxmin()
highest_var_feature = stats_table["variance"].idxmax()

print("\nLowest variance feature:", lowest_var_feature, "=", stats_table.loc[lowest_var_feature, "variance"])
print("Highest variance feature:", highest_var_feature, "=", stats_table.loc[highest_var_feature, "variance"])

# -------------------------------
# (2) Correlation with price
# -------------------------------
corr_series = df_use[X_cols + [y_col]].corr(numeric_only=True)[y_col].drop(y_col)
corr_table = corr_series.sort_values(ascending=False).reset_index()
corr_table.columns = ["feature", "corr_with_price"]

print("\n=== Correlation of each feature with price ===")
display(corr_table)

# Positive correlation features
positive_features = corr_series[corr_series > 0].index.tolist()
print("\nPositively correlated features:", positive_features)

# Highest positive correlation
top_feature = corr_series.idxmax()
print("\nHighest positive correlation feature:", top_feature, "=", corr_series[top_feature])

# -------------------------------
# (3) Negative correlation?
# -------------------------------
negative_features = corr_series[corr_series < 0].index.tolist()
print("\nNegatively correlated features:", negative_features)


=== Summary stats for all features ===


Unnamed: 0,mean,min,max,variance
long,-122.213896,-122.519,-121.315,0.01983262
waterfront,0.007542,0.0,1.0,0.007485226
view,0.234303,0.0,4.0,0.5872426
floors,1.494309,1.0,3.5,0.291588
bathrooms,2.114757,0.0,8.0,0.5931513
bedrooms,3.370842,0.0,33.0,0.865015
condition,3.40943,1.0,5.0,0.4234665
grade,7.656873,1.0,13.0,1.381703
lat,47.560053,47.1559,47.7776,0.0191999
yr_renovated,84.402258,0.0,2015.0,161346.2



Lowest average feature: long = -122.21389640494147
Highest average feature: sqft_lot = 15106.967565816869

Lowest variance feature: waterfront = 0.007485225502689098
Highest variance feature: sqft_lot = 1715658774.1754544

=== Correlation of each feature with price ===


Unnamed: 0,feature,corr_with_price
0,sqft_living,0.702035
1,grade,0.667434
2,sqft_above,0.605567
3,sqft_living15,0.585379
4,bathrooms,0.525138
5,view,0.397293
6,sqft_basement,0.323816
7,bedrooms,0.30835
8,lat,0.307003
9,waterfront,0.266369



Positively correlated features: ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15']

Highest positive correlation feature: sqft_living = 0.7020350546118004

Negatively correlated features: []
