In [437]:
import os
import polars as pl
import numpy as np
import altair as alt

In [438]:
if os.path.exists("./price_prediction_by_length.csv"):
    print('file exists')
    df = pl.read_csv("price_prediction_by_length.csv")
else:
    df = pl.DataFrame({
        'length': [2, 4, 7, 8, 12],
        'price': [10, 20, 35, 42, 65],
    }).with_columns(pl.col("length").cast(pl.Float64), pl.col("price").cast(pl.Float64))

df

file exists


length,price
f64,f64
2.0,10.0
4.0,20.0
7.0,35.0
8.0,42.0
12.0,65.0
…,…
1.0,3.581081
4.0,20.091216
1.0,3.581081
6.0,31.097973


In [439]:
n = len(df)
sum_x = df["length"].sum()
sum_y = df["price"].sum()
sum_xy = (df["length"] * df["price"]).sum()
sum_x_squared = (df["length"] ** 2).sum()

In [440]:
# Calculating the slope (m)
m = (n * sum_xy - sum_x * sum_y) / (n * sum_x_squared - sum_x ** 2)

In [441]:
# Calculating the intercept (b)
b = (sum_y - m * sum_x) / n

In [442]:
# Step 3: Print out the slope and intercept
print(f"Slope (m): {m}")
print(f"Intercept (b): {b}")

Slope (m): 5.503378378378375
Intercept (b): -1.922297297297278


### Predict the price based on length


In [443]:
# # Formula: y = m * x + b
# x = float(input("Enter the length: "))
# y = m * x + b

# print(f"Predicted price: {y}")

### Export the new data


In [444]:
# df = df.vstack(pl.DataFrame({'length': [x], 'price': [y]}))
# display(df)

# # Export
# df.write_csv('price_prediction_by_length.csv')

In [445]:
def get_prediction(x) -> tuple:
    y = m * x + b

    return (np.float64(x), y)


numbers = np.random.randint(1, 11, 45)

rows = []

for number in numbers:
    x, y = get_prediction(number)
    rows.append((x, y))

df = df.vstack(pl.DataFrame(rows, schema=['length', 'price'], orient='row'))

# Export
df.write_csv('price_prediction_by_length.csv')
print(numbers)

[ 3  6  4  6 10  3 10  5  2 10  6  7  1  6  5  6  4  4  8  3  4  6  1  8
  7  4 10  7  1  4  8 10 10  9  7  3  6  8  2  9  3  5  9  6  8]


In [446]:
# n = len(df)
# sum_x = df["length"].sum()
# sum_y = df["price"].sum()
# sum_xy = (df["length"] * df["price"]).sum()
# sum_x_squared = (df["length"] ** 2).sum()

# # Slope (m)
# m = (n * sum_xy - sum_x * sum_y) / (n * sum_x_squared - sum_x ** 2)

# # Intercept (b)
# b = (sum_y - m * sum_x) / n

# line_df = pl.DataFrame({'m': [m], 'b': [b]})
# line_df

In [448]:

dfx = pl.DataFrame({'length': [0, 60], 'price': [0, 60]})


alt.Chart(df).mark_circle().encode(
    x=alt.X(field='length', sort='x'),
    y='price',
    tooltip=['length', 'price']
).properties(width=1000)