In [32]:
import polars as pl
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import time

number_of_lines = 1000
number_of_columns = 4

## Pandas

In [33]:
t1 = time.time()

# Create the DataFrame using Pandas
pandas_data = pd.DataFrame(
    np.random.randn(number_of_lines, number_of_columns),
    columns=[f"column_{i}" for i in range(number_of_columns)]
)

# Select the features (all columns except 'column_0')
x = pandas_data.drop(columns=["column_0"])

# Select the target variable ('column_0')
y = pandas_data["column_0"]

# Split the data into training and testing sets
x_train = x[:80]
y_train = y[:80]

x_test = x[80:]
y_test = y[80:]

# Initialize and fit the Linear Regression model
m = LinearRegression()
m.fit(X=x_train, y=y_train)

# Make predictions on the test set
predictions = m.predict(x_test)

t2 = time.time()

pandas_time = t2 - t1
print("    {}: {:.5f} sekuntia".format("Pandas",t2 - t1))

# print(predictions)


    Pandas: 0.02030 sekuntia


## Polars

In [34]:
t1 = time.time()
polars_data = pl.DataFrame(
    np.random.randn(number_of_lines, number_of_columns)
)

x = polars_data.select([
    pl.all().exclude("column_0"),
])

y = polars_data.select(pl.col("column_0").alias("y"))


x_train = x[:80]
y_train = y[:80]

x_test = x[80:]
y_test = y[80:]


m = LinearRegression()

m.fit(X=x_train.to_numpy(), y=y_train.to_numpy())
m.predict(x_test.to_numpy())

t2 = time.time()

polars_time = t2 - t1
print("    {}: {:.5f} sekuntia".format("Polars",polars_time))

    Polars: 0.01410 sekuntia


## Suorituskyky

In [35]:
aikaero = pandas_time/polars_time
print(f"Polars on {aikaero} kertaa nopeampi kuin Pandas")

Polars on 1.4401914684888875 kertaa nopeampi kuin Pandas


In [36]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Sample data
data = np.array([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the data and transform the data
scaled_data = scaler.fit_transform(data)

print("Original data:\n", data)
print("Scaled data:\n", scaled_data)

# Mean and standard deviation of the original data
print("Mean of the original data:", data.mean(axis=0))
print("Standard deviation of the original data:", data.std(axis=0))

# Mean and standard deviation of the scaled data
print("Mean of the scaled data:", scaled_data.mean(axis=0))
print("Standard deviation of the scaled data:", scaled_data.std(axis=0))

Original data:
 [[1. 2.]
 [2. 3.]
 [3. 4.]
 [4. 5.]]
Scaled data:
 [[-1.34164079 -1.34164079]
 [-0.4472136  -0.4472136 ]
 [ 0.4472136   0.4472136 ]
 [ 1.34164079  1.34164079]]
Mean of the original data: [2.5 3.5]
Standard deviation of the original data: [1.11803399 1.11803399]
Mean of the scaled data: [0. 0.]
Standard deviation of the scaled data: [1. 1.]
