In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [2]:
# Set the random seed for reproducibility
random.seed(42)

# Parameters
num_records = 2000
start_date = datetime(2020, 11, 1)  # Starting from Nov 1, 2020

# Function to generate synthetic soil moisture data
def generate_soil_moisture():
    return np.random.uniform(20, 50, 3)  # Moisture percentages for 10 cm, 20 cm, 30 cm

In [3]:
# Generate the dataset
data = []
for i in range(num_records):
    # Generate date
    date = start_date + timedelta(days=i)

    # Generate soil moisture for different depths
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Plant age (let's assume coconut trees can be aged 1-10 years)
    plant_age = random.randint(1, 10)

    # Generate temperature and humidity values
    temperature = np.random.uniform(25, 35)  # Average temperature in °C for coconut growth
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Generate rainfall data (mm)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain

    # Determine rain status (1 = Yes, 0 = No)
    rain_status = 1 if rainfall > 0 else 0

    # Determine irrigation status (1 = irrigated, 0 = not irrigated)
    # Simple rule: If soil moisture is low (< 30% at 10 cm) and no rain, then irrigate
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]


In [4]:
df = pd.DataFrame(data, columns=columns)

# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_2.csv', index=False)


In [5]:
# Display the first few records
df.tail()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
1995,2026-04-19,49.66882,20.876259,27.605212,10,26.543748,68.290585,14.188652,1,0
1996,2026-04-20,40.692641,46.112973,32.400418,1,27.95015,79.81679,13.395661,1,0
1997,2026-04-21,29.668512,41.11693,26.929688,1,33.997572,69.973369,12.615469,1,0
1998,2026-04-22,40.42858,49.546228,43.378253,2,33.493737,73.846578,0.0,0,0
1999,2026-04-23,26.712341,20.329028,28.154735,1,29.693545,65.891731,0.612867,1,0


In [6]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [7]:
irrigation_status_counts

Irrigation Status (0/1)
0    1511
1     489
Name: count, dtype: int64

new dataset

In [8]:
# Parameters
num_records_per_status = 1500  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2018, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume coconut trees can be aged 1-10 years
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)  # Average temperature in °C
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 1 (Irrigation needed)
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # If irrigation status is 1, adjust soil moisture to reflect that
    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)  # Simulate low moisture
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

    # For irrigation status 0 (No irrigation needed)
    date = start_date + timedelta(days=i + num_records_per_status)  # Next day for the second status
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume the same age, temperature, and humidity
    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 0 (Irrigation not needed)
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    # If irrigation status is 0, adjust soil moisture to reflect that
    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]

In [9]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()


In [10]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
0,2018-11-01,33.432409,35.721911,28.292394,10,29.858857,74.4361,4.476384,1,0
1,2022-12-10,24.058524,43.40749,60.437054,10,29.858857,74.4361,0.0,0,0
2,2018-11-02,34.61319,38.656565,38.209851,10,28.504492,62.57352,0.0,0,0
3,2022-12-11,49.417265,56.056939,65.097728,10,28.504492,62.57352,13.157568,1,0
4,2018-11-03,25.060625,23.251625,28.757721,5,34.871714,70.045331,0.0,0,1


In [11]:
irrigation_status_counts

Irrigation Status (0/1)
0    1683
1    1317
Name: count, dtype: int64

In [12]:
# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_5.csv', index=False)

new dataset

In [13]:
# Parameters
num_records_per_status = 3000  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2010, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []

In [14]:
# Function to calculate water level based on soil moisture and rain status
def calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status):
    if rain_status == 1:
        return 0  # No water needed if it rained
    
    min_moisture = min(soil_moisture_10, soil_moisture_20, soil_moisture_30)
    if min_moisture < 25:
        return np.random.uniform(50, 100)  # High water need
    elif 25 <= min_moisture < 35:
        return np.random.uniform(30, 50)  # Moderate water need
    elif 35 <= min_moisture < 45:
        return np.random.uniform(10, 30)  # Low water need
    else:
        return 0  # No additional water needed

# Modify the dataset generation loop to include the updated water level logic
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)
    humidity = np.random.uniform(60, 80)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

    # For irrigation status 0
    date = start_date + timedelta(days=i + num_records_per_status)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

# Update DataFrame columns to include Water Level
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)', 
    'Water Level (liters)'
]


In [15]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [16]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1),Water Level (liters)
0,2010-11-01,22.251653,20.638118,28.836936,8,31.595453,64.848805,0.0,0,1,93.542042
1,2019-01-18,68.34744,43.809877,69.253551,8,31.595453,64.848805,0.0,0,0,0.0
2,2010-11-02,12.898026,32.549973,18.457011,4,27.082942,71.575637,0.0,0,1,64.399067
3,2019-01-19,26.069127,48.414843,60.398605,4,27.082942,71.575637,1.801358,1,0,0.0
4,2010-11-03,30.66658,20.161691,38.042452,3,31.727407,69.243613,0.0,0,0,88.513554


In [17]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [18]:
irrigation_status_counts

Irrigation Status (0/1)
0    3412
1    2588
Name: count, dtype: int64

In [19]:
df.to_csv('coconut_irrigation_data_with_water_level_8.csv', index=False)