In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [9]:
# Set the random seed for reproducibility
random.seed(42)

# Parameters
num_records = 2000
start_date = datetime(2020, 11, 1)  # Starting from Nov 1, 2020

# Function to generate synthetic soil moisture data
def generate_soil_moisture():
    return np.random.uniform(20, 50, 3)  # Moisture percentages for 10 cm, 20 cm, 30 cm

In [10]:
# Generate the dataset
data = []
for i in range(num_records):
    # Generate date
    date = start_date + timedelta(days=i)

    # Generate soil moisture for different depths
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Plant age (let's assume coconut trees can be aged 1-10 years)
    plant_age = random.randint(1, 10)

    # Generate temperature and humidity values
    temperature = np.random.uniform(25, 35)  # Average temperature in °C for coconut growth
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Generate rainfall data (mm)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain

    # Determine rain status (1 = Yes, 0 = No)
    rain_status = 1 if rainfall > 0 else 0

    # Determine irrigation status (1 = irrigated, 0 = not irrigated)
    # Simple rule: If soil moisture is low (< 30% at 10 cm) and no rain, then irrigate
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]


In [11]:
df = pd.DataFrame(data, columns=columns)

# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_2.csv', index=False)


In [12]:
# Display the first few records
df.tail()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
1995,2026-04-19,46.408555,36.883947,24.020655,10,34.347143,62.879992,0.0,0,0
1996,2026-04-20,21.953785,30.991975,39.96825,1,27.492196,73.024139,6.233625,1,0
1997,2026-04-21,21.522264,46.500389,36.908102,1,29.343556,73.763043,0.0,0,1
1998,2026-04-22,40.44594,46.087357,49.87115,2,31.386366,73.497649,0.0,0,0
1999,2026-04-23,24.047462,25.671762,37.597006,1,30.121471,71.792845,6.01513,1,0


In [13]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [14]:
irrigation_status_counts

Irrigation Status (0/1)
0    1546
1     454
Name: count, dtype: int64

new dataset

In [45]:
# Parameters
num_records_per_status = 1500  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2018, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume coconut trees can be aged 1-10 years
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)  # Average temperature in °C
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 1 (Irrigation needed)
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # If irrigation status is 1, adjust soil moisture to reflect that
    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)  # Simulate low moisture
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

    # For irrigation status 0 (No irrigation needed)
    date = start_date + timedelta(days=i + num_records_per_status)  # Next day for the second status
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume the same age, temperature, and humidity
    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 0 (Irrigation not needed)
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    # If irrigation status is 0, adjust soil moisture to reflect that
    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]

In [46]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()


In [47]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
0,2018-11-01,14.477338,24.649862,39.214852,5,28.635873,72.566376,0.0,0,1
1,2022-12-10,30.80284,44.071541,48.432165,5,28.635873,72.566376,0.0,0,1
2,2018-11-02,40.288039,39.11474,35.330992,5,27.247651,72.189864,5.058075,1,0
3,2022-12-11,31.24636,43.286226,55.44069,5,27.247651,72.189864,0.0,0,1
4,2018-11-03,13.644275,33.497878,36.681421,9,25.026344,70.543298,0.0,0,1


In [48]:
irrigation_status_counts

Irrigation Status (0/1)
0    1707
1    1293
Name: count, dtype: int64

In [49]:
# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_5.csv', index=False)

new dataset

In [27]:
# Parameters
num_records_per_status = 3000  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2010, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []

In [28]:
# Function to calculate water level based on soil moisture and rain status
def calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status):
    if rain_status == 1:
        return 0  # No water needed if it rained
    
    min_moisture = min(soil_moisture_10, soil_moisture_20, soil_moisture_30)
    if min_moisture < 25:
        return np.random.uniform(50, 100)  # High water need
    elif 25 <= min_moisture < 35:
        return np.random.uniform(30, 50)  # Moderate water need
    elif 35 <= min_moisture < 45:
        return np.random.uniform(10, 30)  # Low water need
    else:
        return 0  # No additional water needed

# Modify the dataset generation loop to include the updated water level logic
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)
    humidity = np.random.uniform(60, 80)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

    # For irrigation status 0
    date = start_date + timedelta(days=i + num_records_per_status)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

# Update DataFrame columns to include Water Level
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)', 
    'Water Level (liters)'
]


In [29]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [30]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1),Water Level (liters)
0,2010-11-01,39.305028,34.187018,50.940124,6,32.752079,69.117342,0.0,0,0,18.835983
1,2019-01-18,27.621755,40.963801,38.589987,6,32.752079,69.117342,0.0,0,1,45.21156
2,2010-11-02,31.558979,28.601639,23.188844,5,30.497058,72.232239,0.0,0,1,55.007048
3,2019-01-19,24.24854,35.348334,40.117289,5,30.497058,72.232239,0.0,0,1,80.230087
4,2010-11-03,20.113274,18.412044,38.121902,1,27.928988,60.418313,0.0,0,1,65.20233


In [31]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [32]:
irrigation_status_counts

Irrigation Status (0/1)
0    3412
1    2588
Name: count, dtype: int64

In [33]:
df.to_csv('coconut_irrigation_data_with_water_level_8.csv', index=False)