In [None]:
import pandas as pd
import random

In [None]:
# Function to generate synthetic data for a single crop based on its min and max values
def generate_synthetic_data_for_single_crop(existing_crop, num_samples=100):
    synthetic_crops = []

    # Extract numeric ranges and categorical values
    ranges = {}
    categorical = {}
    static_data = {}

    for column in existing_crop.index:
        if column.startswith('Min_'):
            feature = column.replace('Min_', '')
            max_column = f"Max_{feature}"
            ranges[feature] = (existing_crop[column], existing_crop[max_column])
        elif column.startswith('Max_'):
            continue
        elif column in ['Category_pH', 'Soil_Type', 'Photoperiod', 'Season']:  # Specify your categorical columns here
            categorical[column] = existing_crop[column].split(',')
        else:
            static_data[column] = existing_crop[column]

    for _ in range(num_samples):  # Generate specified number of data points for the crop
        synthetic_crop = static_data.copy()  # Start with static data

        for feature, (min_val, max_val) in ranges.items():
            synthetic_value = random.uniform(min_val, max_val)
            synthetic_crop[feature] = synthetic_value

        for feature, categories in categorical.items():
            synthetic_value = random.choice(categories)
            synthetic_crop[feature] = synthetic_value

        synthetic_crops.append(synthetic_crop)

    return synthetic_crops

In [None]:
# Read the CSV file containing crop data with a specific encoding and handle encoding errors
csv_file_path = '/content/Task-3_Fruits_Data.csv'  # Replace with your CSV file path
try:
    crop_data = pd.read_csv(csv_file_path, encoding='latin1')
except UnicodeDecodeError:
    crop_data = pd.read_csv(csv_file_path, encoding='ISO-8859-1', errors='replace')


synthetic_crop_data = []

for i in range(len(crop_data)):
      # Select the single crop from the dataset
      selected_crop = crop_data.iloc[i]
      # Generate synthetic data for the selected single crop (700 data points)
      data = generate_synthetic_data_for_single_crop(selected_crop, num_samples=700)

      synthetic_crop_data.extend(data)

# Convert list of synthetic crops to a DataFrame
synthetic_crop_data_df = pd.DataFrame(synthetic_crop_data)

# Save the synthetic crop data to a new CSV file
output_csv_file_path = '/content/synthetic_veg_crop_data_700.csv'  # Replace with desired output file path
synthetic_crop_data_df.to_csv(output_csv_file_path, index=False)

# Display the first few rows of the generated synthetic crop data
print("Generated Synthetic Crop Data:")
print(synthetic_crop_data_df.head())

Generated Synthetic Crop Data:
   S. no.   Name Fertility N-P-K Ratio  Temperature     Rainfall        pH  \
0       1  Apple      High  10-10-2010    19.899152  2223.146698  6.610480   
1       1  Apple      High  10-10-2010    17.097604  2201.163166  6.983360   
2       1  Apple      High  10-10-2010    20.627202   971.764433  6.355170   
3       1  Apple      High  10-10-2010    16.591247   706.604869  6.174308   
4       1  Apple      High  10-10-2010    24.980775  2044.256409  6.649969   

   Light_Hours  Light_Intensity         Rh   Nitrogen  Phosphorus   Potassium  \
0    12.454554       734.919875  90.436523  89.806568   45.193743  177.456899   
1    13.541109       779.965097  90.143247  87.588392   49.373282  185.529178   
2    12.311864       612.614929  93.598390  85.575867   41.832545  196.902946   
3    13.978299       816.250187  90.161804  92.648988   36.794537  167.670935   
4    12.609330       818.125499  90.789626  89.905644   49.403398  153.317732   

       Yield 