In [29]:
import polars as pl
import json
import os

### Function to load model parameter and datasets


In [30]:
def load_model_and_data(model_filename='model_params.json', data_filename='dataset.csv'):
    with open(model_filename, 'r') as f:
        model_params = json.load(f)

    # Load dataset from a CSV file
    df_loaded = pl.read_csv(data_filename)
    return model_params['slope'], model_params['intercept'], df_loaded

### Function to calculate coefficients from loaded dataset


In [31]:
def calculate_coefficients(df):
    mean_size = df['size'].mean()
    mean_price = df['price'].mean()

    sizes = df['size'].to_list()
    prices = df['price'].to_list()

    numerator = sum((sizes[i] - mean_size) * (prices[i] - mean_price)
                    for i in range(len(sizes)))
    denominator = sum((sizes[i] - mean_size) ** 2 for i in range(len(sizes)))

    m = numerator / denominator  # slope
    b = mean_price - m * mean_size  # intercept
    return m, b

### Function to save model parameters and dataset


In [32]:
def save_model_and_data(m, b, df, model_filename='model_params.json', data_filename='dataset.csv'):
    model_params = {
        "slope": m,
        "intercept": b
    }
    # Save model parameters to a JSON file
    with open(model_filename, 'w') as f:
        json.dump(model_params, f)

    # Save dataset to a CSV file
    df.write_csv(data_filename)

### Function to make predictions


In [33]:
def predict(size, m, b):
    return m * size + b

### Load the model parameters and dataset


In [34]:
# Model and dataset files path
model_filename = 'model_params.json'
data_filename = 'dataset.csv'

# Load the model parameters and dataset if files exist, otherwise initialize a new dataset
if os.path.exists(model_filename) and os.path.exists(data_filename):
    loaded_m, loaded_b, df_loaded = load_model_and_data(
        model_filename, data_filename)
else:
    # Sample data: [size in square feet, price]
    data = {
        "size": [1000, 1500, 2000, 2500, 3000],
        # Use float values for prices
        "price": [150000.0, 200000.0, 250000.0, 300000.0, 350000.0]
    }

    # Create a Polars DataFrame with explicit types
    df_loaded = pl.DataFrame(data).with_columns([
        pl.col("size").cast(pl.Int64),
        pl.col("price").cast(pl.Float64)
    ])
    loaded_m, loaded_b = calculate_coefficients(df_loaded)

# Show loaded or calculated model parameters and dataset
print(f"Slope: {loaded_m}, Intercept: {loaded_b}")
display(df_loaded.tail(10))

Slope: 100.00107443233533, Intercept: 50001.74918541085


size,price
i64,f64
2000,250000.0
2500,300000.0
3000,350000.0
3300,380000.0
2750,325000.0
2389,288900.0
9291,979100.0
598,109800.0
4023,452354.0
1200,170003.038504


### Main loop for user input


In [35]:
while True:
    # Get user input for size
    user_input = input(
        "Enter the size of the house in sqft (or 'exit' to quit): ")

    if user_input.lower() == 'exit':
        break

    try:
        size = float(user_input)  # Convert input to float
    except ValueError:
        print("Invalid input. Please enter a valid number.")
        continue

    # Check if size exists in the dataset
    if size in df_loaded['size'].to_list():
        # If the size exists, retrieve the corresponding price
        price = df_loaded.filter(pl.col("size") == size)['price'].to_numpy()[0]
        print(f"The price for a house of size {size} sqft is: ${price:.2f}")
    else:
        # If not, predict the price and append to the dataset
        predicted_price = predict(size, loaded_m, loaded_b)
        print(f"Predicted price for a house of size {
              size} sqft is: ${predicted_price:.2f}")

        # Append new size and predicted price to the dataset with explicit types
        new_data = pl.DataFrame(
            {"size": [int(size)], "price": [predicted_price]})
        df_loaded = df_loaded.vstack(new_data)

        # Recalculate coefficients after adding new data
        loaded_m, loaded_b = calculate_coefficients(df_loaded)

        # Save the updated model parameters and dataset
        save_model_and_data(loaded_m, loaded_b, df_loaded)

Predicted price for a house of size 6520.0 sqft is: $702008.75
