In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('climate_action_data.csv')
df.head()

In [None]:
df.info()
df.describe()

In [None]:
#Dropping duplicate values
df.duplicated().sum()
df.drop_duplicates(inplace=True)

In [None]:
df.replace('error', np.nan, inplace=True)
df.isnull().sum()

In [None]:
numeric_columns = ['Soil_Moisture(%)', 'Soil_pH', 'Temperature(C)', 'Humidity(%)', 'Fertilizer_Recommended(kg/ha)', 'Irrigation_Recommended(mm)']
for col in numeric_columns:
    #df[col] = pd.to_numeric(df[col], errors='coerce')
    df[col] = df[col].astype(float)

print(df.info())

# Replace NaN with median for each numeric column
for col in numeric_columns:
    median_value = df[col].median()
    df.fillna({col: median_value}, inplace=True)

df[numeric_columns].isnull().sum()

#Fill empty crop types with mode
df['Crop_Type'] = df['Crop_Type'].fillna(df['Crop_Type'].mode()[0])


In [None]:
#Descriptive Statistics
df.describe()

#Histograms for Numeric Variables
df[numeric_columns].hist(figsize=(12, 10), bins=20)
plt.tight_layout()
plt.show()

#Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df[numeric_columns].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Variables Most Influencing Fertilizer Recommendations
if 'Fertilizer_Recommended(kg/ha)' in df.columns:
    # Select only numeric columns
    numeric_df = df.select_dtypes(include=['number'])
    
    # Compute correlations with Fertilizer_Recommended
    corr_with_fertilizer = numeric_df.corr()['Fertilizer_Recommended(kg/ha)'].sort_values(ascending=False)
    
    # Display the result
    print("Variables most influencing fertilizer recommendations:")
    print(corr_with_fertilizer)
else:
    print("Fertilizer_Recommended(kg/ha) column not found.")

# Top 5 influencing factors
corr_with_fertilizer.drop('Fertilizer_Recommended(kg/ha)').head(5).plot(kind='barh')
plt.xlabel('Correlation with Fertilizer Recommended (kg/ha)')
plt.title('Top Factors Influencing Fertilizer Recommendation')
plt.show()


# Crop Type with Highest Average Soil Moisture
if 'Crop_Type' in df.columns and 'Soil_Moisture(%)' in df.columns:
    avg_moisture = df.groupby('Crop_Type')['Soil_Moisture(%)'].mean().sort_values(ascending=False)
    print("Crop with highest average soil moisture:")
    print(avg_moisture.head(1))

# Irrigation Adjustment for Crops with Average Temperatures > 30°C
if 'Temperature(C)' in df.columns and 'Crop_Type' in df.columns:
    high_temp_crops = df[df['Temperature(C)'] > 30].groupby('Crop_Type')['Temperature(C)'].mean()
    print("Crops experiencing average temperatures above 30°C:")
    print(high_temp_crops)


In [None]:
df.to_csv('cleaned_precision_agriculture_data.csv', index=False)
print("Cleaned data exported successfully.")

print(df.isnull().sum())