# Interpolating Kharif Data

In [1]:
import pandas as pd
import re

# Load your CSV
df = pd.read_csv("Karnataka_Datasets/Across/S2/Merged_Karnataka_S2_Kharif.csv")

# Extract all columns that end in _1 to _37
vi_pattern = re.compile(r'^(.*)_(\d+)$')
vi_groups = {}

for col in df.columns:
    match = vi_pattern.match(col)
    if match:
        base, num = match.groups()
        num = int(num)
        if 1 <= num <= 37:
            vi_groups.setdefault(base, []).append(col)

# Interpolate within each VI group
for vi, columns in vi_groups.items():
    # Sort columns by number to maintain order NDVI_1, NDVI_2, ..., NDVI_37
    columns_sorted = sorted(columns, key=lambda x: int(x.split('_')[-1]))
    df[columns_sorted] = df[columns_sorted].interpolate(axis=1)

# Save the updated dataframe
df.to_csv("Karnataka_Datasets/Across/S2/Merged_Karnataka_S2_Kharif_Interpolated.csv", index=False)


In [1]:
import pandas as pd

# Load the flattened CSV
df = pd.read_csv('Karnataka_Datasets/Across/Cloud_Prob/Karnataka_Merged_S2.csv')  # Adjust path if needed

# Identify the time-series feature groups
feature_prefixes = set(
    col.split('_')[0]
    for col in df.columns
    if '_' in col and col not in ['Latitude', 'Longitude', 'Crop_Name']
)

# Create a mapping to rename columns to sequential format
rename_mapping = {}
for feature in feature_prefixes:
    # Find all columns for this feature and sort them by date
    cols = [col for col in df.columns if col.startswith(feature + '_')]
    sorted_cols = sorted(
        cols,
        key=lambda x: pd.to_datetime(x.replace(feature + '_', ''), format='%d-%m-%Y', errors='coerce')
    )
    # Create rename mapping
    for i, col in enumerate(sorted_cols, 1):
        rename_mapping[col] = f"{feature}_{i}"

# Rename columns
df.rename(columns=rename_mapping, inplace=True)

# Interpolate each feature group
for feature in feature_prefixes:
    feature_cols = sorted([col for col in df.columns if col.startswith(feature + '_')],
                          key=lambda x: int(x.split('_')[1]))
    df[feature_cols] = df[feature_cols].interpolate(axis=1, limit_direction='both')

# Save the final output
df.to_csv('Karnataka_Datasets/Across/Cloud_Prob/Interpolated_Karnataka_S2.csv', index=False)
print("✅ Columns renamed, interpolated, and saved as 'Interpolated_Karnataka_S2.csv'")


✅ Columns renamed, interpolated, and saved as 'Interpolated_Karnataka_S2.csv'
