In [12]:
import numpy as np
import pandas as pd

In [13]:
# Function to simulate data based on logical relationships
def generate_copra_drying_data(num_samples=1000):
    data = []
    for _ in range(num_samples):
        # Randomly generate input features within realistic ranges
        maturity_stage = np.random.randint(90, 120)  # Maturity stage in days
        initial_moisture = np.random.uniform(50, 70)  # Initial moisture level in %
        temperature = np.random.uniform(25, 40)  # Ambient temperature in °C
        humidity = np.random.uniform(40, 80)  # Humidity in %

        # Calculate drying time based on input features using a logical formula
        # Higher initial moisture, lower temperature, and higher humidity increase drying time
        base_drying_time = (initial_moisture - 8) * 0.5  # Base drying time factor

        # Adjust drying time based on temperature and humidity
        temp_factor = 35 / temperature  # Higher temperatures reduce drying time
        humidity_factor = 1 + ((humidity - 50) / 100)  # Higher humidity slightly increases drying time

        drying_time = base_drying_time * temp_factor * humidity_factor
        drying_time = max(drying_time, 12)  # Ensure minimum drying time

        # Simulate current moisture level after drying time
        current_moisture = initial_moisture - (drying_time * 2)  # Rate of drying
        current_moisture = max(current_moisture, 8)  # Final moisture shouldn't go below 8%

        # Estimate potential oil yield inversely proportional to final moisture
        oil_yield = (100 - current_moisture) * 0.05  # Adjust scale for realistic oil yield

        # Append the generated row to the dataset
        data.append([maturity_stage, initial_moisture, temperature, humidity, current_moisture, drying_time, oil_yield])

    # Create a DataFrame from the generated data
    columns = [
        'Maturity Stage (Days)',
        'Initial Moisture Level (%)',
        'Temperature (°C)',
        'Humidity (%)',
        'Current Moisture Level (%)',
        'Drying Time (hrs)',
        'Estimated Oil Yield (kg)'
    ]
    df = pd.DataFrame(data, columns=columns)
    return df

In [14]:
# Generate the dataset
df = generate_copra_drying_data(1000)

# Save to CSV or display the dataset
df.to_csv('copra_drying_data.csv', index=False)

In [15]:
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Current Moisture Level (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,104,52.565983,39.775666,45.925916,14.948475,18.808754,4.252576
1,104,61.176535,26.856521,78.541761,8.0,44.540243,4.6
2,91,58.136656,25.54227,54.773219,8.0,35.990196,4.6
3,113,64.888346,38.367514,78.445258,8.0,33.3285,4.6
4,95,64.821157,33.603108,47.603892,8.0,28.882568,4.6


In [16]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Generate synthetic dataset
num_samples = 1000  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(50, 70)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_2.csv', index=False)




In [17]:
# Print first few rows of the dataset
df.head()

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs)
0,107,65.123137,38.16622,68.984003,26.33
1,103,58.588936,35.419197,62.828137,25.77
2,109,50.271383,29.794524,75.508538,29.38
3,113,68.310143,28.212724,68.150751,39.04
4,86,53.129068,35.700123,52.149276,19.92


In [27]:
import numpy as np
import pandas as pd
import random

# Function to simulate the drying time based on multiple factors
def simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity):
    # Base drying time influenced by initial moisture and maturity stage
    base_drying_time = (initial_moisture - 8) * 0.5 + (120 - maturity_stage) * 0.05
    
    # Adjustments based on temperature and humidity
    temp_factor = 30 / temperature  # Lower temperature increases drying time
    humidity_factor = 1 + (humidity - 50) * 0.01  # Higher humidity slightly increases drying time
    
    # Calculate drying time with some random noise for variability
    drying_time = base_drying_time * temp_factor * humidity_factor
    drying_time += np.random.normal(0, 1)  # Adding noise to simulate real-world variations
    
    # Ensure drying time is positive and rounded
    return max(round(drying_time, 2), 1)

# Function to estimate oil yield based on moisture level and maturity stage
def estimate_oil_yield(maturity_stage, current_moisture_level):
    # Higher maturity and lower final moisture generally yield more oil
    base_yield = (120 - maturity_stage) * 0.02 + (55 - current_moisture_level) * 0.1
    base_yield = max(base_yield, 1)  # Ensure a positive oil yield
    return round(base_yield, 2)

# Generate synthetic dataset
num_samples = 2850  # Number of data points
data = []

for _ in range(num_samples):
    maturity_stage = random.randint(80, 130)  # Maturity stage in days
    initial_moisture = random.uniform(20, 60)  # Initial moisture level (%)
    temperature = random.uniform(25, 40)  # Ambient temperature (°C)
    humidity = random.uniform(40, 80)  # Humidity (%)
    
    # Calculate drying time
    drying_time = simulate_drying_time(maturity_stage, initial_moisture, temperature, humidity)
    
    # Estimate oil yield based on the initial conditions and final moisture level
    current_moisture_level = 8  # Target final moisture level
    estimated_oil_yield = estimate_oil_yield(maturity_stage, current_moisture_level)
    
    # Append data to list
    data.append([maturity_stage, initial_moisture, temperature, humidity, drying_time, estimated_oil_yield])

# Create DataFrame
columns = ['Maturity Stage (Days)', 'Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)', 'Drying Time (hrs)', 'Estimated Oil Yield (kg)']
df = pd.DataFrame(data, columns=columns)

# Save to CSV (optional)
df.to_csv('copra_drying_dataset_with_oil_yield_3.csv', index=False)

In [19]:
# Print first few rows of the dataset
df.head()


Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,91,32.266124,25.077217,57.040704,14.19,5.56
1,122,42.915679,28.057623,56.447121,18.76,4.88
2,103,21.769954,26.944263,79.292834,8.75,4.71
3,107,54.541423,28.188901,53.148003,28.9,4.62
4,107,53.478885,35.623798,43.9382,21.48,4.85
