In [3]:
# Step 1: Install required packages (run in Google Colab)
!pip install pandas scikit-learn

# Step 2: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Step 3: Load UCI Forest Cover Type Dataset from official UCI repo
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz"

# Define column names as per UCI dataset documentation
columns = ['Elevation', 'Aspect', 'Slope',
           'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology',
           'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon',
           'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points'] + \
          [f'Wilderness_Area{i}' for i in range(4)] + \
          [f'Soil_Type{i}' for i in range(40)] + ['Cover_Type']

# Load data directly from the web
df = pd.read_csv(url, header=None, names=columns)

print("✅ Loaded Forest Cover Type Dataset:")
print(df.head())

# Step 4: Prepare features and labels
X = df.drop(columns=['Cover_Type'])  # features
y = df['Cover_Type']                 # labels (target)

# Step 5: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 6: Land classification with RandomForest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)
print("\n📊 Land Classification Report:")
print(classification_report(y_test, y_pred))
print("✅ Accuracy:", accuracy_score(y_test, y_pred))

# Step 7: Simulate Tree Species Recommendation (dummy dataset)
species_data = pd.DataFrame({
    'Elevation': [2500, 2200, 1800, 1500, 1200],
    'Aspect': [45, 90, 135, 180, 225],
    'Slope': [10, 15, 20, 25, 30],
    'tree_species': ['Pine', 'Oak', 'Maple', 'Cedar', 'Teak']
})

# Train KNN model on simplified environmental features
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(species_data[['Elevation', 'Aspect', 'Slope']], species_data['tree_species'])

# Step 8: Predict on a new area
sample_input = pd.DataFrame({
    'Elevation': [2100],
    'Aspect': [100],
    'Slope': [12],
})

# Add missing columns with 0s to match the RF model input
for col in X.columns:
    if col not in sample_input.columns:
        sample_input[col] = 0

# Predict land cover type
land_prediction = rf_model.predict(sample_input)[0]
land_type_str = "Forested" if land_prediction in [1, 2, 3, 4, 5, 6, 7] else "Deforested"  # basic logic

# Recommend tree species
if land_type_str == "Deforested":
    tree_species = knn_model.predict(sample_input[['Elevation', 'Aspect', 'Slope']])[0]
else:
    tree_species = "No need — already forested."

# Final Output
print("\n🌿 AI Reforestation Planner Result:")
print(f"Prediction for area: {land_type_str}")
print(f"Recommended Tree Species: {tree_species}")


✅ Loaded Forest Cover Type Dataset:
   Elevation  Aspect  Slope  Horizontal_Distance_To_Hydrology  \
0       2596      51      3                               258   
1       2590      56      2                               212   
2       2804     139      9                               268   
3       2785     155     18                               242   
4       2595      45      2                               153   

   Vertical_Distance_To_Hydrology  Horizontal_Distance_To_Roadways  \
0                               0                              510   
1                              -6                              390   
2                              65                             3180   
3                             118                             3090   
4                              -1                              391   

   Hillshade_9am  Hillshade_Noon  Hillshade_3pm  \
0            221             232            148   
1            220             235            151   