In [1]:
import numpy as np
import pandas as pd

In [2]:
# Function to simulate data based on logical relationships
def generate_copra_drying_data(num_samples=1000):
    data = []
    for _ in range(num_samples):
        # Randomly generate input features within realistic ranges
        maturity_stage = np.random.randint(90, 120)  # Maturity stage in days
        initial_moisture = np.random.uniform(50, 70)  # Initial moisture level in %
        temperature = np.random.uniform(25, 40)  # Ambient temperature in °C
        humidity = np.random.uniform(40, 80)  # Humidity in %

        # Calculate drying time based on input features using a logical formula
        # Higher initial moisture, lower temperature, and higher humidity increase drying time
        base_drying_time = (initial_moisture - 8) * 0.5  # Base drying time factor

        # Adjust drying time based on temperature and humidity
        temp_factor = 35 / temperature  # Higher temperatures reduce drying time
        humidity_factor = 1 + ((humidity - 50) / 100)  # Higher humidity slightly increases drying time

        drying_time = base_drying_time * temp_factor * humidity_factor
        drying_time = max(drying_time, 12)  # Ensure minimum drying time

        # Simulate current moisture level after drying time
        current_moisture = initial_moisture - (drying_time * 2)  # Rate of drying
        current_moisture = max(current_moisture, 8)  # Final moisture shouldn't go below 8%

        # Estimate potential oil yield inversely proportional to final moisture
        oil_yield = (100 - current_moisture) * 0.05  # Adjust scale for realistic oil yield

        # Append the generated row to the dataset
        data.append([maturity_stage, initial_moisture, temperature, humidity, current_moisture, drying_time, oil_yield])

    # Create a DataFrame from the generated data
    columns = [
        'Maturity Stage (Days)',
        'Initial Moisture Level (%)',
        'Temperature (°C)',
        'Humidity (%)',
        'Current Moisture Level (%)',
        'Drying Time (hrs)',
        'Estimated Oil Yield (kg)'
    ]
    df = pd.DataFrame(data, columns=columns)
    return df

In [3]:
# Generate the dataset
df = generate_copra_drying_data(1000)

# Save to CSV or display the dataset
df.to_csv('copra_drying_data.csv', index=False)

In [4]:
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Current Moisture Level (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,116,69.716515,26.941157,77.271789,8.0,51.02175,4.6
1,104,62.531397,25.676909,44.38447,8.0,35.078618,4.6
2,91,69.015346,38.769324,58.325968,9.345973,29.834686,4.532701
3,96,58.745397,39.747447,73.861657,8.0,27.673389,4.6
4,96,66.261581,38.481216,78.154717,8.0,33.955187,4.6


In [5]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 70)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_2.csv', index=False)




In [6]:
# Print first few rows of the dataset
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs)
0,81,56.170323,32.00459,47.115131,22.53
1,95,56.079118,27.176977,67.435012,33.26
2,106,53.175226,28.299767,70.756458,29.16
3,110,58.536631,34.784635,43.391057,20.24
4,95,50.718114,30.040504,71.096248,27.69


In [7]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Function to estimate oil yield based on moisture level and maturity stage
def estimate_oil_yield(maturity_stage, current_moisture_level):
    # Higher maturity and lower final moisture generally yield more oil
    base_yield = (120 - maturity_stage) * 0.02 + (55 - current_moisture_level) * 0.1
    base_yield = max(base_yield, 1)  # Ensure a positive oil yield
    return round(base_yield, 2)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 60)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Estimate oil yield based on the initial conditions and final moisture level
    current_moisture_level = 8  # Target final moisture level
    estimated_oil_yield = estimate_oil_yield(maturity_stage, current_moisture_level)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time, estimated_oil_yield])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)', 'Estimated Oil Yield (kg)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_with_oil_yield_3.csv', index=False)



In [8]:
# Print first few rows of the dataset
df.head()


Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,130,54.645739,35.513079,44.789934,16.64,4.5
1,118,53.109554,39.122636,63.832887,20.16,4.74
2,101,52.050974,33.05234,79.595865,27.3,5.08
3,119,58.265091,36.838714,56.139443,22.39,4.72
4,120,55.951529,26.617009,58.265678,28.2,4.7
