In [1]:
import numpy as np
import pandas as pd

In [2]:
# Function to simulate data based on logical relationships
def generate_copra_drying_data(num_samples=1000):
    data = []
    for _ in range(num_samples):
        # Randomly generate input features within realistic ranges
        maturity_stage = np.random.randint(90, 120)  # Maturity stage in days
        initial_moisture = np.random.uniform(50, 70)  # Initial moisture level in %
        temperature = np.random.uniform(25, 40)  # Ambient temperature in °C
        humidity = np.random.uniform(40, 80)  # Humidity in %

        # Calculate drying time based on input features using a logical formula
        # Higher initial moisture, lower temperature, and higher humidity increase drying time
        base_drying_time = (initial_moisture - 8) * 0.5  # Base drying time factor

        # Adjust drying time based on temperature and humidity
        temp_factor = 35 / temperature  # Higher temperatures reduce drying time
        humidity_factor = 1 + ((humidity - 50) / 100)  # Higher humidity slightly increases drying time

        drying_time = base_drying_time * temp_factor * humidity_factor
        drying_time = max(drying_time, 12)  # Ensure minimum drying time

        # Simulate current moisture level after drying time
        current_moisture = initial_moisture - (drying_time * 2)  # Rate of drying
        current_moisture = max(current_moisture, 8)  # Final moisture shouldn't go below 8%

        # Estimate potential oil yield inversely proportional to final moisture
        oil_yield = (100 - current_moisture) * 0.05  # Adjust scale for realistic oil yield

        # Append the generated row to the dataset
        data.append([maturity_stage, initial_moisture, temperature, humidity, current_moisture, drying_time, oil_yield])

    # Create a DataFrame from the generated data
    columns = [
        'Maturity Stage (Days)',
        'Initial Moisture Level (%)',
        'Temperature (°C)',
        'Humidity (%)',
        'Current Moisture Level (%)',
        'Drying Time (hrs)',
        'Estimated Oil Yield (kg)'
    ]
    df = pd.DataFrame(data, columns=columns)
    return df

In [3]:
# Generate the dataset
df = generate_copra_drying_data(1000)

# Save to CSV or display the dataset
df.to_csv('copra_drying_data.csv', index=False)

In [4]:
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Current Moisture Level (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,95,55.562495,25.972369,56.308752,8.0,34.069058,4.6
1,90,50.778301,36.721552,75.04881,8.0,25.492951,4.6
2,113,52.771975,26.425622,58.780788,8.0,32.253087,4.6
3,109,58.922414,39.01826,48.480588,13.938239,22.492087,4.303088
4,97,65.433602,26.945645,53.776797,8.0,38.709342,4.6


In [5]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 70)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_2.csv', index=False)




In [6]:
# Print first few rows of the dataset
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs)
0,86,56.584403,25.06237,76.597315,38.54
1,93,57.168879,39.769833,43.72303,17.99
2,113,55.445474,33.240565,45.950685,19.53
3,98,52.306003,27.9514,43.819392,24.26
4,114,52.394858,25.911155,61.651889,29.09


In [7]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Function to estimate oil yield based on moisture level and maturity stage
def estimate_oil_yield(maturity_stage, current_moisture_level):
    # Higher maturity and lower final moisture generally yield more oil
    base_yield = (120 - maturity_stage) * 0.02 + (55 - current_moisture_level) * 0.1
    base_yield = max(base_yield, 1)  # Ensure a positive oil yield
    return round(base_yield, 2)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 60)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Estimate oil yield based on the initial conditions and final moisture level
    current_moisture_level = 8  # Target final moisture level
    estimated_oil_yield = estimate_oil_yield(maturity_stage, current_moisture_level)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time, estimated_oil_yield])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)', 'Estimated Oil Yield (kg)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_with_oil_yield_3.csv', index=False)



In [8]:
# Print first few rows of the dataset
df.head()


Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,81,57.17943,33.603826,61.663614,26.25,5.48
1,129,55.619035,33.78297,54.228588,22.46,4.52
2,97,53.793165,28.576236,67.861001,30.84,5.16
3,118,53.137549,32.375957,69.496505,25.23,4.74
4,89,57.056345,30.432557,65.311436,28.81,5.32
