In [1]:
import numpy as np
import pandas as pd

In [2]:
# Function to simulate data based on logical relationships
def generate_copra_drying_data(num_samples=1000):
    data = []
    for _ in range(num_samples):
        # Randomly generate input features within realistic ranges
        maturity_stage = np.random.randint(90, 120)  # Maturity stage in days
        initial_moisture = np.random.uniform(50, 70)  # Initial moisture level in %
        temperature = np.random.uniform(25, 40)  # Ambient temperature in °C
        humidity = np.random.uniform(40, 80)  # Humidity in %

        # Calculate drying time based on input features using a logical formula
        # Higher initial moisture, lower temperature, and higher humidity increase drying time
        base_drying_time = (initial_moisture - 8) * 0.5  # Base drying time factor

        # Adjust drying time based on temperature and humidity
        temp_factor = 35 / temperature  # Higher temperatures reduce drying time
        humidity_factor = 1 + ((humidity - 50) / 100)  # Higher humidity slightly increases drying time

        drying_time = base_drying_time * temp_factor * humidity_factor
        drying_time = max(drying_time, 12)  # Ensure minimum drying time

        # Simulate current moisture level after drying time
        current_moisture = initial_moisture - (drying_time * 2)  # Rate of drying
        current_moisture = max(current_moisture, 8)  # Final moisture shouldn't go below 8%

        # Estimate potential oil yield inversely proportional to final moisture
        oil_yield = (100 - current_moisture) * 0.05  # Adjust scale for realistic oil yield

        # Append the generated row to the dataset
        data.append([maturity_stage, initial_moisture, temperature, humidity, current_moisture, drying_time, oil_yield])

    # Create a DataFrame from the generated data
    columns = [
        'Maturity Stage (Days)',
        'Initial Moisture Level (%)',
        'Temperature (°C)',
        'Humidity (%)',
        'Current Moisture Level (%)',
        'Drying Time (hrs)',
        'Estimated Oil Yield (kg)'
    ]
    df = pd.DataFrame(data, columns=columns)
    return df

In [3]:
# Generate the dataset
df = generate_copra_drying_data(1000)

# Save to CSV or display the dataset
df.to_csv('copra_drying_data.csv', index=False)

In [4]:
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Current Moisture Level (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,92,64.354485,34.074246,49.469426,8.0,28.789219,4.6
1,108,62.365122,33.493315,58.768921,8.0,30.896202,4.6
2,110,68.565833,38.545091,70.60273,8.0,33.162999,4.6
3,102,66.245319,35.502781,44.628676,11.909094,27.168113,4.404545
4,110,54.406946,35.896909,58.936401,8.0,24.645465,4.6


In [5]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 70)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_2.csv', index=False)




In [6]:
# Print first few rows of the dataset
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs)
0,88,50.151095,36.823012,56.57033,19.78
1,87,68.954234,29.59814,40.3335,28.94
2,92,56.550231,39.193917,48.142857,20.4
3,102,51.23702,28.46015,54.511265,25.63
4,130,63.650439,31.777045,47.098347,25.71


In [1]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Function to estimate oil yield based on moisture level and maturity stage
def estimate_oil_yield(maturity_stage, current_moisture_level):
    # Higher maturity and lower final moisture generally yield more oil
    base_yield = (120 - maturity_stage) * 0.02 + (55 - current_moisture_level) * 0.1
    base_yield = max(base_yield, 1)  # Ensure a positive oil yield
    return round(base_yield, 2)

# Generate synthetic dataset
num_samples = 3500  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(20, 60)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Estimate oil yield based on the initial conditions and final moisture level
    current_moisture_level = 8  # Target final moisture level
    estimated_oil_yield = estimate_oil_yield(maturity_stage, current_moisture_level)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time, estimated_oil_yield])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)', 'Estimated Oil Yield (kg)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_with_oil_yield_3.csv', index=False)



In [8]:
# Print first few rows of the dataset
df.head()


Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,127,56.60066,38.811475,75.038997,23.8,4.56
1,102,57.450324,38.597286,71.364034,22.18,5.06
2,95,56.353554,36.959799,66.104949,23.88,5.2
3,83,53.980471,26.752996,42.993729,25.96,5.44
4,103,58.589568,29.419786,57.546722,27.99,5.04
