In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [2]:
# Set the random seed for reproducibility
random.seed(42)

# Parameters
num_records = 2000
start_date = datetime(2020, 11, 1)  # Starting from Nov 1, 2020

# Function to generate synthetic soil moisture data
def generate_soil_moisture():
    return np.random.uniform(20, 50, 3)  # Moisture percentages for 10 cm, 20 cm, 30 cm

In [3]:
# Generate the dataset
data = []
for i in range(num_records):
    # Generate date
    date = start_date + timedelta(days=i)

    # Generate soil moisture for different depths
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Plant age (let's assume coconut trees can be aged 1-10 years)
    plant_age = random.randint(1, 10)

    # Generate temperature and humidity values
    temperature = np.random.uniform(25, 35)  # Average temperature in °C for coconut growth
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Generate rainfall data (mm)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain

    # Determine rain status (1 = Yes, 0 = No)
    rain_status = 1 if rainfall > 0 else 0

    # Determine irrigation status (1 = irrigated, 0 = not irrigated)
    # Simple rule: If soil moisture is low (< 30% at 10 cm) and no rain, then irrigate
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]


In [4]:
df = pd.DataFrame(data, columns=columns)

# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_2.csv', index=False)


In [5]:
# Display the first few records
df.tail()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
1995,2026-04-19,47.431688,44.988289,44.619098,10,27.606701,60.167515,0.0,0,0
1996,2026-04-20,20.273377,41.652791,24.371685,1,26.783008,77.986574,0.0,0,1
1997,2026-04-21,34.431361,36.655867,32.80972,1,30.668157,70.949131,0.0,0,0
1998,2026-04-22,28.198953,36.360074,48.599858,2,30.698948,73.462896,13.169737,1,0
1999,2026-04-23,40.821651,49.207728,47.526073,1,31.011878,74.021874,0.0,0,0


In [6]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [7]:
irrigation_status_counts

Irrigation Status (0/1)
0    1546
1     454
Name: count, dtype: int64

new dataset

In [8]:
# Parameters
num_records_per_status = 1500  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2018, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume coconut trees can be aged 1-10 years
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)  # Average temperature in °C
    humidity = np.random.uniform(60, 80)  # Average humidity in %

    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])  # 70% chance of no rain
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 1 (Irrigation needed)
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    # If irrigation status is 1, adjust soil moisture to reflect that
    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)  # Simulate low moisture
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

    # For irrigation status 0 (No irrigation needed)
    date = start_date + timedelta(days=i + num_records_per_status)  # Next day for the second status
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()

    # Assume the same age, temperature, and humidity
    # Rainfall generation
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0

    # Irrigation status 0 (Irrigation not needed)
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    # If irrigation status is 0, adjust soil moisture to reflect that
    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    # Append the generated data
    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status
    ])

# Create a DataFrame
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)'
]

In [9]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()


In [10]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1)
0,2018-11-01,15.248992,26.976035,35.454151,10,25.238311,68.305626,0.0,0,1
1,2022-12-10,10.855147,38.682048,52.430919,10,25.238311,68.305626,0.0,0,1
2,2018-11-02,27.661914,22.921819,21.432329,10,25.193494,66.41341,9.28325,1,0
3,2022-12-11,30.012203,48.940115,45.55395,10,25.193494,66.41341,0.0,0,1
4,2018-11-03,37.724633,33.293046,20.021664,1,28.121156,77.008044,0.0,0,0


In [11]:
irrigation_status_counts

Irrigation Status (0/1)
0    1732
1    1268
Name: count, dtype: int64

In [12]:
# Optionally save to a CSV file
df.to_csv('coconut_irrigation_data_5.csv', index=False)

new dataset

In [13]:
# Parameters
num_records_per_status = 3000  # Number of records for each irrigation status
total_records = num_records_per_status * 2  # Total records for both statuses

# Date range
start_date = datetime(2010, 11, 1)

# Function to generate soil moisture values
def generate_soil_moisture():
    return (
        np.random.uniform(15, 40),  # Soil moisture at 10 cm
        np.random.uniform(20, 50),  # Soil moisture at 20 cm
        np.random.uniform(25, 60)   # Soil moisture at 30 cm
    )

# Generate the dataset
data = []

In [14]:
# Function to calculate water level based on soil moisture and rain status
def calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status):
    if rain_status == 1:
        return 0  # No water needed if it rained
    
    min_moisture = min(soil_moisture_10, soil_moisture_20, soil_moisture_30)
    if min_moisture < 25:
        return np.random.uniform(50, 100)  # High water need
    elif 25 <= min_moisture < 35:
        return np.random.uniform(30, 50)  # Moderate water need
    elif 35 <= min_moisture < 45:
        return np.random.uniform(10, 30)  # Low water need
    else:
        return 0  # No additional water needed

# Modify the dataset generation loop to include the updated water level logic
for i in range(num_records_per_status):
    # For irrigation status 1
    date = start_date + timedelta(days=i)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    plant_age = random.randint(1, 10)
    temperature = np.random.uniform(25, 35)
    humidity = np.random.uniform(60, 80)
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 1 if (soil_moisture_10 < 30 and rain_status == 0) else 0

    if irrigation_status == 1:
        soil_moisture_10 = random.uniform(10, 29)
        soil_moisture_20 = random.uniform(15, 35)
        soil_moisture_30 = random.uniform(20, 40)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

    # For irrigation status 0
    date = start_date + timedelta(days=i + num_records_per_status)
    soil_moisture_10, soil_moisture_20, soil_moisture_30 = generate_soil_moisture()
    rainfall = np.random.choice([0, np.random.uniform(0.1, 15)], p=[0.7, 0.3])
    rain_status = 1 if rainfall > 0 else 0
    irrigation_status = 0 if (soil_moisture_10 >= 30 or rain_status == 1) else 1

    if irrigation_status == 0:
        soil_moisture_10 = random.uniform(30, 60)
        soil_moisture_20 = random.uniform(35, 70)
        soil_moisture_30 = random.uniform(40, 80)

    water_level = calculate_water_level(soil_moisture_10, soil_moisture_20, soil_moisture_30, rain_status)

    data.append([
        date.date(),
        soil_moisture_10,
        soil_moisture_20,
        soil_moisture_30,
        plant_age,
        temperature,
        humidity,
        rainfall,
        rain_status,
        irrigation_status,
        water_level
    ])

# Update DataFrame columns to include Water Level
columns = [
    'Date', 
    'Soil Moisture (10 cm) (%)', 
    'Soil Moisture (20 cm) (%)', 
    'Soil Moisture (30 cm) (%)', 
    'Plant Age (years)', 
    'Temperature (°C)', 
    'Humidity (%)', 
    'Rainfall (mm)', 
    'Rain Status (0/1)', 
    'Irrigation Status (0/1)', 
    'Water Level (liters)'
]


In [15]:
df = pd.DataFrame(data, columns=columns)
df['Soil Moisture (10 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (20 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
df['Soil Moisture (30 cm) (%)'] += np.random.normal(0, 5, size=df.shape[0])
# Display the counts of irrigation status
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [16]:
df.head()

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Irrigation Status (0/1),Water Level (liters)
0,2010-11-01,24.055421,6.67762,32.423214,4,28.368024,74.219475,0.0,0,1,93.956414
1,2019-01-18,29.303217,50.030231,31.172559,4,28.368024,74.219475,0.0,0,1,86.875579
2,2010-11-02,22.811548,36.105189,45.168562,6,25.968538,76.371438,8.780954,1,0,0.0
3,2019-01-19,38.26711,58.453313,56.796087,6,25.968538,76.371438,12.773067,1,0,0.0
4,2010-11-03,10.014544,9.385408,22.334848,4,34.853595,61.28753,0.0,0,1,93.719744


In [17]:
irrigation_status_counts = df['Irrigation Status (0/1)'].value_counts()

In [18]:
irrigation_status_counts

Irrigation Status (0/1)
0    3491
1    2509
Name: count, dtype: int64

In [19]:
df.to_csv('coconut_irrigation_data_with_water_level_8.csv', index=False)