<a href="https://colab.research.google.com/github/maurelia/kozangis/blob/master/filter_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load data
data = pd.read_csv("data.csv")

# Convert the 'timestamp' column to datetime
data['DATE_TIME'] = pd.to_datetime(data['DATE_TIME'])

# Set 'timestamp' as the index
data.set_index('DATE_TIME', inplace=True)

# Display the first few rows
print(data.head())

data.describe()

FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

In [None]:
# Plot time series for each variable
plt.figure(figsize=(14, 10))

# Vacuum Pressure
plt.subplot(3, 3, 1)
plt.plot(data['PRESV_F1'], label='Vacuum Pressure F1')
plt.title('Vacuum Pressure F1 Over Time')
plt.legend()

plt.subplot(3, 3, 2)
plt.plot(data['PRESV_F2'], label='Vacuum Pressure F2')
plt.title('Vacuum Pressure F2 Over Time')
plt.legend()

plt.subplot(3, 3, 3)
plt.plot(data['PRESV_F3'], label='Vacuum Pressure F3')
plt.title('Vacuum Pressure F3 Over Time')
plt.legend()

plt.subplot(3, 3, 4)
plt.plot(data['PRESV_F4'], label='Vacuum Pressure F4')
plt.title('Vacuum Pressure F4 Over Time')
plt.legend()


# Solids Content
plt.subplot(3, 3, 5)
plt.plot(data['FEEDING_SOLIDS'], label='Solids Content', color='orange')
plt.title('Solids Content Over Time')
plt.legend()

# Retrowashing Pressure
plt.subplot(3, 3, 6)
plt.plot(data['PRESRV_F1_A'], label='Retrowashing Pressure F1-A', color='green')
plt.title('Retrowashing Pressure Over Time')
plt.legend()

# Feeding Flow Rate
plt.subplot(3, 3, 7)
plt.plot(data['TAIL_FLOW'], label='Feeding Flow Rate', color='red')
plt.title('Feeding Flow Rate Over Time')
plt.legend()

# Drying Efficiency
plt.subplot(3, 3, 8)
plt.plot(data['DRYING_RATE'], label='Drying Efficiency', color='purple')
plt.title('Drying Efficiency Over Time')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Decompose drying efficiency (you can do this for other variables as well)
decomposition = seasonal_decompose(data['DRYING_RATE'], model='additive', period=24*7)  # Adjust period as needed

# Plot decomposition
plt.figure(figsize=(12, 8))
decomposition.plot()
plt.suptitle('Decomposition of Drying Efficiency')
plt.show()

# Decompose drying efficiency (you can do this for other variables as well)
decomposition = seasonal_decompose(data['FEEDING_SOLIDS'], model='additive', period=24*7)  # Adjust period as needed

# Plot decomposition
plt.figure(figsize=(12, 8))
decomposition.plot()
plt.suptitle('Decomposition of Feeding Solids (%)')
plt.show()

In [None]:
# Autocorrelation Function (ACF)
plt.figure(figsize=(12, 6))
plot_acf(data['DRYING_RATE'], lags=96, title='Autocorrelation of Drying Efficiency')
plt.show()

# Partial Autocorrelation Function (PACF)
plt.figure(figsize=(12, 6))
plot_pacf(data['DRYING_RATE'], lags=96, title='Partial Autocorrelation of Drying Efficiency')
plt.show()

In [None]:
# Calculate rolling mean and standard deviation for drying efficiency
rolling_mean = data['DRYING_RATE'].rolling(window=96).mean()  # Adjust window size as needed
rolling_std = data['DRYING_RATE'].rolling(window=96).std()

# Plot rolling statistics
plt.figure(figsize=(12, 6))
plt.plot(data['DRYING_RATE'], label='Original')
plt.plot(rolling_mean, label='Rolling Mean', color='red')
plt.plot(rolling_std, label='Rolling Std', color='green')
plt.title('Rolling Mean & Standard Deviation of Drying Efficiency')
plt.legend()
plt.show()

# Calculate rolling mean and standard deviation for drying efficiency
rolling_mean = data['FEEDING_SOLIDS'].rolling(window=96).mean()  # Adjust window size as needed
rolling_std = data['FEEDING_SOLIDS'].rolling(window=96).std()

# Plot rolling statistics
plt.figure(figsize=(12, 6))
plt.plot(data['FEEDING_SOLIDS'], label='Original')
plt.plot(rolling_mean, label='Rolling Mean', color='red')
plt.plot(rolling_std, label='Rolling Std', color='green')
plt.title('Rolling Mean & Standard Deviation of Solids Content')
plt.legend()
plt.show()

In [None]:
# Cross-correlation between feeding flow rate and drying efficiency
cross_corr = np.correlate(data['FEEDING_SOLIDS'], data['DRYING_RATE'], mode='full')
lags = np.arange(-len(data) + 1, len(data))

# Plot cross-correlation
plt.figure(figsize=(12, 6))
plt.plot(lags, cross_corr)
plt.title('Cross-Correlation Between Feeding Flow Rate and Drying Efficiency')
plt.xlabel('Lag')
plt.ylabel('Correlation')
plt.show()


In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Fit an ARIMA model (example)
model = ARIMA(data['DRYING_RATE'], order=(2, 1, 0))  # Adjust order as needed
results = model.fit()

# Forecast future values
forecast_steps = 24*28  # Number of steps to forecast
forecast = results.forecast(steps=forecast_steps)

# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(data['DRYING_RATE'], label='Historical')
plt.plot(forecast, label='Forecast', color='red')
plt.title('Drying Efficiency Forecast')
plt.legend()
plt.show()

In [None]:
# Define features (X) and target (y)
X = data[['FEEDING_SOLIDS']]  # Independent variable
y = data['DRYING_RATE']  # Dependent variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Display the model coefficients
print(f'Intercept: {model.intercept_}')
print(f'Coefficient for solids_content: {model.coef_[0]}')

In [None]:
# Define a range of solids content values (e.g., from current max to a higher value)
current_max_solids = data['FEEDING_SOLIDS'].max()
new_solids_values = np.linspace(current_max_solids, current_max_solids + 5, 100)  # Increase by 10%

# Predict drying efficiency for the new solids content values
predicted_efficiency = model.predict(new_solids_values.reshape(-1, 1))

# Plot the results
plt.figure(figsize=(8, 6))
plt.plot(new_solids_values, predicted_efficiency, label='Predicted Drying Efficiency', color='red')
plt.scatter(data['FEEDING_SOLIDS'], data['DRYING_RATE'], alpha=0.5, label='Actual Data')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Impact of Increasing Solids Content on Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define the increase in solids content (e.g., 5%)
increase_in_solids = 5  # percent

# Calculate the expected increase in drying efficiency
coefficient = model.coef_[0]
expected_increase = coefficient * increase_in_solids

print(f'Expected increase in drying efficiency for a {increase_in_solids}% increase in solids content: {expected_increase:.2f} tons/hour')

In [None]:
# Define features (X) and target (y)
X = data[['FEEDING_SOLIDS']]  # Independent variable
y = data['DRYING_RATE']  # Dependent variable

# Create polynomial features (e.g., degree=2 for quadratic relationship)
degree = 2  # You can adjust the degree based on the data
poly = PolynomialFeatures(degree=degree)
X_poly = poly.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)


In [None]:
# Train a linear regression model on polynomial features
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Display the model coefficients
print(f'Intercept: {model.intercept_}')
print(f'Coefficients: {model.coef_}')

In [None]:
# Create a range of solids content values for prediction
X_range = np.linspace(X['FEEDING_SOLIDS'].min(), X['FEEDING_SOLIDS'].max(), 100).reshape(-1, 1)
X_range_poly = poly.transform(X_range)
y_range_pred = model.predict(X_range_poly)

# Plot the results
plt.figure(figsize=(8, 6))
plt.scatter(X['FEEDING_SOLIDS'], y, alpha=0.7, label='Actual Data')
plt.plot(X_range, y_range_pred, color='red', label=f'Polynomial Regression (Degree={degree})')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Polynomial Regression: Solids Content vs Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define the increase in solids content (e.g., 5%)
increase_in_solids = 10  # percent

# Predict drying efficiency at the current max solids content
current_max_poly = poly.transform([[current_max_solids]])
current_efficiency = model.predict(current_max_poly)

# Predict drying efficiency at the increased solids content
new_solids = current_max_solids + increase_in_solids
new_solids_poly = poly.transform([[new_solids]])
new_efficiency = model.predict(new_solids_poly)

# Calculate the expected increase
expected_increase = new_efficiency[0] - current_efficiency[0]

print(f'Expected increase in drying efficiency for a {increase_in_solids}% increase in solids content: {expected_increase:.2f} tons/hour')

In [None]:
# Define features (X) and target (y)
X = data[['FEEDING_SOLIDS']]  # Independent variable
y = data['DRYING_RATE']  # Dependent variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize the Gradient Boosting Regressor
model = GradientBoostingRegressor(
    n_estimators=300,  # Number of boosting stages
    learning_rate=0.1,  # Step size shrinkage
    max_depth=3,  # Maximum depth of each tree
    random_state=42
)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

In [None]:
# Create a range of solids content values for prediction
X_range = np.linspace(X['FEEDING_SOLIDS'].min(), X['FEEDING_SOLIDS'].max(), 100).reshape(-1, 1)
y_range_pred = model.predict(X_range)

# Plot the results
plt.figure(figsize=(8, 6))
plt.scatter(X['FEEDING_SOLIDS'], y, alpha=0.7, label='Actual Data')
plt.plot(X_range, y_range_pred, color='red', label='Gradient Boosting Predictions')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Gradient Boosting: Solids Content vs Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define a range of solids content values (e.g., from current max to a higher value)
current_max_solids = X['FEEDING_SOLIDS'].max()
new_solids_values = np.linspace(current_max_solids, current_max_solids + 10, 100)  # Increase by 10%

# Predict drying efficiency for the new solids content values
predicted_efficiency = model.predict(new_solids_values.reshape(-1, 1))

# Plot the results
plt.figure(figsize=(8, 6))
plt.plot(new_solids_values, predicted_efficiency, label='Predicted Drying Efficiency', color='red')
plt.scatter(X['FEEDING_SOLIDS'], y, alpha=0.5, label='Actual Data')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Impact of Increasing Solids Content on Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define the increase in solids content (e.g., 5%)
increase_in_solids = 5  # percent

# Predict drying efficiency at the current max solids content
current_efficiency = model.predict([[current_max_solids]])

# Predict drying efficiency at the increased solids content
new_solids = current_max_solids + increase_in_solids
new_efficiency = model.predict([[new_solids]])

# Calculate the expected increase
expected_increase = new_efficiency[0] - current_efficiency[0]

print(f'Expected increase in drying efficiency for a {increase_in_solids}% increase in solids content: {expected_increase:.2f} tons/hour')


In [None]:
# Define features (X) and target (y)
X = data[['FEEDING_SOLIDS']]  # Independent variable
y = data['DRYING_RATE']  # Dependent variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize the Random Forest Regressor
model = RandomForestRegressor(
    n_estimators=200,  # Number of trees in the forest
    max_depth=None,    # Maximum depth of each tree (None means no limit)
    min_samples_split=2,  # Minimum number of samples required to split a node
    random_state=42
)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

In [None]:
# Create a range of solids content values for prediction
X_range = np.linspace(X['FEEDING_SOLIDS'].min(), X['FEEDING_SOLIDS'].max(), 100).reshape(-1, 1)
y_range_pred = model.predict(X_range)

# Plot the results
plt.figure(figsize=(8, 6))
plt.scatter(X['FEEDING_SOLIDS'], y, alpha=0.7, label='Actual Data')
plt.plot(X_range, y_range_pred, color='red', label='Random Forest Predictions')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Random Forest: Solids Content vs Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define a range of solids content values (e.g., from current max to a higher value)
current_max_solids = X['FEEDING_SOLIDS'].max()
new_solids_values = np.linspace(current_max_solids, current_max_solids + 10, 100)  # Increase by 10%

# Predict drying efficiency for the new solids content values
predicted_efficiency = model.predict(new_solids_values.reshape(-1, 1))

# Plot the results
plt.figure(figsize=(8, 6))
plt.plot(new_solids_values, predicted_efficiency, label='Predicted Drying Efficiency', color='red')
plt.scatter(X['FEEDING_SOLIDS'], y, alpha=0.5, label='Actual Data')
plt.xlabel('Solids Content (%)')
plt.ylabel('Drying Efficiency (tons/hour)')
plt.title('Impact of Increasing Solids Content on Drying Efficiency')
plt.legend()
plt.show()

In [None]:
# Define the increase in solids content (e.g., 5%)
increase_in_solids = 5  # percent

# Predict drying efficiency at the current max solids content
current_efficiency = model.predict([[current_max_solids]])

# Predict drying efficiency at the increased solids content
new_solids = current_max_solids + increase_in_solids
new_efficiency = model.predict([[new_solids]])

# Calculate the expected increase
expected_increase = new_efficiency[0] - current_efficiency[0]

print(f'Expected increase in drying efficiency for a {increase_in_solids}% increase in solids content: {expected_increase:.2f} tons/hour')

In [None]:
from scipy.optimize import curve_fit


In [None]:
# Define the exponential function
def exponential_func(x, a, b):
    return a * np.exp(b * x)

In [None]:
# Extract the data
x_data = data['FEEDING_SOLIDS']
y_data = data['DRYING_RATE']

# Fit the exponential model
params, covariance = curve_fit(exponential_func, x_data, y_data, p0=[1, 0.1])  # p0 is the initial guess for parameters

# Extract the fitted parameters
a, b = params
print(f'Fitted parameters: a = {a:.2f}, b = {b:.2f}')

In [None]:
# Create a range of solids content values for prediction
x_range = np.linspace(x_data.min(), x_data.max(), 100)
y_pred = exponential_func(x_range, a, b)

# Plot the results
plt.figure(figsize=(8, 6))
plt.scatter(x_data, y_data, alpha=0.7, label='Datos reales')
plt.plot(x_range, y_pred, color='red', label='Ajuste exponencial')
plt.xlabel('Contenido de Sólidos (%)')
plt.ylabel('Tasa de filtrado (tons/hora)')
plt.title('Regresión Exponencial: Contenido de sólidos vs Tasa de filtrado')
plt.legend()
plt.savefig('exponential_regression.png')
plt.show()

In [None]:
# Define a range of solids content values (e.g., from current max to a higher value)
current_max_solids = x_data.max()
new_solids_values = np.linspace(current_max_solids, current_max_solids + 10, 100)  # Increase by 10%

# Predict drying efficiency for the new solids content values
predicted_efficiency = exponential_func(new_solids_values, a, b)

# Plot the results
plt.figure(figsize=(8, 6))
plt.plot(new_solids_values, predicted_efficiency, label='Predicción tasa de filtrado', color='red')
plt.scatter(x_data, y_data, alpha=0.5, label='Datos reales')
plt.xlabel('Contenido de solidos (%)')
plt.ylabel('Tasa de filtrado (tons/hora)')
plt.title('Impacto del contenido de solidos en la tasa de filtrado')
plt.legend()
plt.show()

In [None]:
# Define the increase in solids content (e.g., 12%)
increase_in_solids = 15  # percent

# Predict drying efficiency at the current max solids content
current_efficiency = exponential_func(current_max_solids, a, b)

# Predict drying efficiency at the increased solids content
new_solids = current_max_solids + increase_in_solids
new_efficiency = exponential_func(new_solids, a, b)

# Calculate the expected increase
expected_increase = new_efficiency - current_efficiency

print(f'Expected increase in drying efficiency for a {increase_in_solids}% increase in solids content: {expected_increase:.2f} tons/hour')