In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load your data into a pandas DataFrame
data = pd.read_csv("data.csv")

# Split data into features (X) and target (%Recovery)
X = data[["Column Wash Cond.", "Elution pH", "Elution Flow Rate"]]
y = data["%Recovery"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 2.3204593094146206
R-squared: -0.6937659192807397


In [3]:
import numpy as np

# New data points for testing
new_data = np.array([
    [1.5, 7.6, 100],
    [2.5, 7.4, 100],
    [1.5, 7.4, 200],
    [1.5, 7.4, 200]
    # Add more data points here
])

# Standardize the new data using the same scaler
new_data_scaled = scaler.transform(new_data)

# Make predictions on the new data
new_predictions = model.predict(new_data_scaled)

# Print the predicted %Recovery for the new data
for i, prediction in enumerate(new_predictions):
    print(f"Data Point {i + 1}: Predicted %Recovery = {prediction:.2f}")


Data Point 1: Predicted %Recovery = 89.21
Data Point 2: Predicted %Recovery = 89.11
Data Point 3: Predicted %Recovery = 87.54
Data Point 4: Predicted %Recovery = 87.54


