In [None]:
Source code for ps 2 : 

Tmrad and initial process temperature :

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate synthetic dataset
np.random.seed(42)  # For reproducibility
n_samples = 1000

# Generate random values for parameters
c_p = np.random.uniform(1000, 4000, n_samples)  # Specific heat capacity (J/kg·K)
R = 8.314  # Universal gas constant (J/(mol·K))
T_0 = np.random.uniform(300, 600, n_samples)  # Initial temperature (K)
q_0 = np.random.uniform(5000, 20000, n_samples)  # Heat release rate (J/s·kg)
E = np.random.uniform(50000, 150000, n_samples)  # Activation energy (J/mol)

# Calculate TMRad using the provided formula
TMRad = (c_p * R * T_0**2) / (q_0 * E)

# Create a DataFrame
data = pd.DataFrame({
    'Specific_Heat_Capacity': c_p,
    'Initial_Temperature': T_0,
    'Heat_Release_Rate': q_0,
    'Activation_Energy': E,
    'TMRad': TMRad
})

# Split the dataset into training and testing sets
X = data[['Specific_Heat_Capacity', 'Initial_Temperature', 'Heat_Release_Rate', 'Activation_Energy']]
y = data['TMRad']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
models = {
    'SVR': SVR(),
    'Lasso Regression': Lasso(),
    'Ridge Regression': Ridge(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
}

# Store actual vs predicted values for each model
results = {}

# Train models and store actual vs predicted values
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Store results
    results[model_name] = {
        'Actual': y_test,
        'Predicted': y_pred,
        'MSE': mse,
        'MAE': mae,
        'R²': r2
    }

# Display actual vs predicted for each model
for model_name, metrics in results.items():
    print(f"{model_name} Results:")
    print(f"MSE: {metrics['MSE']:.2f}, MAE: {metrics['MAE']:.2f}, R²: {metrics['R²']:.2f}\n")
    
    # Create a DataFrame for actual vs predicted
    comparison_df = pd.DataFrame({
        'Actual': metrics['Actual'],
        'Predicted': metrics['Predicted']
    })
    
    print(comparison_df.head())  # Display first few rows
    print("\n" + "="*40 + "\n")  # Separator for clarity






Random forest different number of samples : 

# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Function to generate synthetic dataset
def generate_data(n_samples):
    np.random.seed(42)  # For reproducibility
    c_p = np.random.uniform(1000, 4000, n_samples)  # Specific heat capacity (J/kg·K)
    R = 8.314  # Universal gas constant (J/(mol·K))
    T_0 = np.random.uniform(300, 600, n_samples)  # Initial temperature (K)
    q_0 = np.random.uniform(5000, 20000, n_samples)  # Heat release rate (J/s·kg)
    E = np.random.uniform(50000, 150000, n_samples)  # Activation energy (J/mol)
    TMRad = (c_p * R * T_0**2) / (q_0 * E)  # Calculate TMRad

    data = pd.DataFrame({
        'Specific_Heat_Capacity': c_p,
        'Initial_Temperature': T_0,
        'Heat_Release_Rate': q_0,
        'Activation_Energy': E,
        'TMRad': TMRad
    })
    return data

# Function to evaluate the Random Forest model
def evaluate_random_forest(n_samples):
    # Generate dataset
    data = generate_data(n_samples)
    X = data[['Specific_Heat_Capacity', 'Initial_Temperature', 'Heat_Release_Rate', 'Activation_Energy']]
    y = data['TMRad']
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    # Random Forest Regression
    rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_reg.fit(X_train, y_train)
    y_pred_rf = rf_reg.predict(X_test)
    
    # Evaluate model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    
    return mse_rf, mae_rf, r2_rf

# Test Random Forest model with different numbers of samples
sample_sizes = [100, 500, 1000, 5000, 10000, 20000, 30000, 50000]
results = []

for n in sample_sizes:
    mse, mae, r2 = evaluate_random_forest(n)
    results.append((n, mse, mae, r2))

# Convert results into a DataFrame
results_df = pd.DataFrame(results, columns=['Number of Samples', 'MSE', 'MAE', 'R²'])

# Display results
print("Results Summary:")
print(results_df)

# Plotting the results
plt.figure(figsize=(12, 6))
plt.plot(results_df['Number of Samples'], results_df['MSE'], marker='o', label='MSE')
plt.plot(results_df['Number of Samples'], results_df['MAE'], marker='o', label='MAE')
plt.plot(results_df['Number of Samples'], results_df['R²'], marker='o', label='R²')
plt.xlabel('Number of Samples')
plt.ylabel('Metrics')
plt.title('Impact of Sample Size on Random Forest Performance')
plt.legend()
plt.grid(True)
plt.show()

Maximum threshold temperature part :

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate synthetic dataset
np.random.seed(42)
n_samples = 1000

# Generate random values for parameters
c_p = np.random.uniform(1000, 4000, n_samples)  # Specific heat capacity (J/kg·K)
R = 8.314  # Universal gas constant (J/(mol·K))
T_0 = np.random.uniform(300, 600, n_samples)  # Initial temperature (K)
q_0 = np.random.uniform(5000, 20000, n_samples)  # Heat release rate (J/s·kg)
E = np.random.uniform(50000, 150000, n_samples)  # Activation energy (J/mol)

# Calculate Maximum Threshold Temperature (hypothetical formula)
threshold_temp = np.sqrt((c_p * T_0**2) / (q_0 * E))

# Create a DataFrame
data = pd.DataFrame({
    'Specific_Heat_Capacity': c_p,
    'Initial_Temperature': T_0,
    'Heat_Release_Rate': q_0,
    'Activation_Energy': E,
    'Max_Threshold_Temperature': threshold_temp
})

# Split the dataset into training and testing sets
X = data[['Specific_Heat_Capacity', 'Initial_Temperature', 'Heat_Release_Rate', 'Activation_Energy']]
y = data['Max_Threshold_Temperature']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
models = {
    'SVR': SVR(),
    'Lasso Regression': Lasso(alpha=0.01),
    'Ridge Regression': Ridge(alpha=1.0),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
}

# Train models, predict, and evaluate
results = {}
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    results[model_name] = {
        'Actual': y_test,
        'Predicted': y_pred,
        'MSE': mse,
        'MAE': mae,
        'R²': r2
    }

# Display evaluation results and actual vs predicted for each model
for model_name, metrics in results.items():
    print(f"{model_name} Results:")
    print(f"MSE: {metrics['MSE']:.5f}, MAE: {metrics['MAE']:.5f}, R²: {metrics['R²']:.5f}\n")
    
    comparison_df = pd.DataFrame({
        'Actual': metrics['Actual'].values,
        'Predicted': metrics['Predicted']
    })
    
    print(f"First 5 Actual vs Predicted values for {model_name}:")
    print(comparison_df.head())
    print("\n" + "="*40 + "\n")

# Plot actual vs predicted for one model (e.g., Random Forest)
plt.figure(figsize=(10, 6))
plt.scatter(results['Random Forest']['Actual'], results['Random Forest']['Predicted'], alpha=0.5, label='Random Forest')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', label='Ideal')
plt.xlabel('Actual Threshold Temperature')
plt.ylabel('Predicted Threshold Temperature')
plt.title('Actual vs Predicted Threshold Temperature (Random Forest)')
plt.legend()
plt.show()

Max threshold temperature using frank-kamenetskii method :​

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Simulating some data for demonstration (replace with your actual dataset)
np.random.seed(42)  # For reproducibility
n_samples = 1000

# Generating synthetic data
c_p = np.random.uniform(1000, 4000, n_samples)  # Specific heat capacity (J/kg·K)
R = 8.314  # Universal gas constant (J/(mol·K))
T_0 = np.random.uniform(300, 600, n_samples)  # Initial temperature (K)
q_0 = np.random.uniform(5000, 20000, n_samples)  # Heat release rate (J/s·kg)
E = np.random.uniform(50000, 150000, n_samples)  # Activation energy (J/mol)

# Threshold temperature using the formula
T_threshold = (q_0 * E) / (c_p * T_0**2)

# Creating DataFrame
data = pd.DataFrame({
    'Specific_Heat_Capacity': c_p,
    'Initial_Temperature': T_0,
    'Heat_Release_Rate': q_0,
    'Activation_Energy': E,
    'Threshold_Temperature': T_threshold
})

# Features and target
X = data[['Specific_Heat_Capacity', 'Initial_Temperature', 'Heat_Release_Rate', 'Activation_Energy']]
y = data['Threshold_Temperature']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scaling the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Creating models
models = {
    'SVR': SVR(),
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
}

# Train and evaluate each model
results = {}
for name, model in models.items():
    # Fit the model
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Calculate performance metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    results[name] = {'MSE': mse, 'MAE': mae, 'R2': r2, 'y_pred': y_pred, 'y_test': y_test}

# Display results and generate plots
for model, metrics in results.items():
    print(f"{model} - MSE: {metrics['MSE']}, MAE: {metrics['MAE']}, R²: {metrics['R2']}")
    
    # Plot Predicted vs Actual values
    plt.figure(figsize=(8, 6))
    plt.scatter(metrics['y_test'], metrics['y_pred'], color='blue', alpha=0.6, label='Predicted vs Actual')
    plt.plot([metrics['y_test'].min(), metrics['y_test'].max()], 
             [metrics['y_test'].min(), metrics['y_test'].max()], 
             color='red', linestyle='--', label='Perfect Prediction')
    plt.title(f'Predicted vs Actual - {model}')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.legend()
    plt.show()



 



