In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import pickle
import json

In [14]:
df = pd.read_csv('.\\dataset\\cleaned_output_data_3.csv')

# Display the first few rows to check the data
print(df.head())

   air_temp  aerosols                        Land_Type
0      27.3    1.2855  Heat-Adapted Coastal Ecosystems
1      28.3    0.6677  Heat-Adapted Coastal Ecosystems
2      25.8    0.7394  Heat-Adapted Coastal Ecosystems
3      28.3    0.5672  Heat-Adapted Coastal Ecosystems
4      25.8    0.5149  Heat-Adapted Coastal Ecosystems


In [15]:
df = df.dropna()

In [16]:
df

Unnamed: 0,air_temp,aerosols,Land_Type
0,27.3,1.285500,Heat-Adapted Coastal Ecosystems
1,28.3,0.667700,Heat-Adapted Coastal Ecosystems
2,25.8,0.739400,Heat-Adapted Coastal Ecosystems
3,28.3,0.567200,Heat-Adapted Coastal Ecosystems
4,25.8,0.514900,Heat-Adapted Coastal Ecosystems
...,...,...,...
18716,31.0,3.373906,Green Agriculture Initiatives
18717,30.8,3.617094,Green Agriculture Initiatives
18718,33.7,3.636035,Green Agriculture Initiatives
18719,31.9,0.126460,Heat-Adapted Coastal Ecosystems


In [17]:
# Define features and target variable
X = df[['air_temp', 'aerosols']]  # Features
y = df['Land_Type']  # Target

# Check for missing data
df.isnull().sum()

air_temp     0
aerosols     0
Land_Type    0
dtype: int64

In [18]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
# Initialize and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [21]:
# Predict on the training data
y_train_pred = clf.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy Score:", train_accuracy)

Training Accuracy Score: 1.0


In [22]:
y_pred = clf.predict(X_test)

# Evaluate the performance
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy Score: 1.0

Classification Report:
                                           precision    recall  f1-score   support

           Green Agriculture Initiatives       1.00      1.00      1.00       117
         Heat-Adapted Coastal Ecosystems       1.00      1.00      1.00      1526
     Heat-Resilient Urban Infrastructure       1.00      1.00      1.00        97
        High-Altitude Glacier Protection       1.00      1.00      1.00       523
Integrated Mountain Ecosystem Management       1.00      1.00      1.00       817
    Urban Air Quality Monitoring Network       1.00      1.00      1.00       695
         Wetland Carbon Sink Restoration       1.00      1.00      1.00      1842

                                accuracy                           1.00      5617
                               macro avg       1.00      1.00      1.00      5617
                            weighted avg       1.00      1.00      1.00      5617



In [23]:
from sklearn.model_selection import cross_val_score

# Perform cross-validation
cv_scores = cross_val_score(clf, X, y_encoded, cv=5)  # 5-fold cross-validation

print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", cv_scores.mean())

Cross-Validation Scores: [1.         1.         1.         1.         0.99973291]
Mean Cross-Validation Score: 0.9999465811965813


In [24]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Get predictions from cross-validation
from sklearn.model_selection import cross_val_predict

y_pred_cv = cross_val_predict(clf, X, y_encoded, cv=5)

# Compute confusion matrix and additional metrics
conf_matrix = confusion_matrix(y_encoded, y_pred_cv)
precision = precision_score(y_encoded, y_pred_cv, average='weighted')
recall = recall_score(y_encoded, y_pred_cv, average='weighted')
f1 = f1_score(y_encoded, y_pred_cv, average='weighted')

print("Confusion Matrix:\n", conf_matrix)
print("Precision Score:", precision)
print("Recall Score:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[ 381    0    0    0    0    0    0]
 [   0 4975    0    0    0    0    0]
 [   0    0  335    0    0    0    0]
 [   0    0    0 1823    1    0    0]
 [   0    0    0    0 2781    0    0]
 [   0    0    0    0    0 2456    0]
 [   0    0    0    0    0    0 5969]]
Precision Score: 0.9999466032505545
Recall Score: 0.9999465840499974
F1 Score: 0.999946581527727


In [25]:
def predict_land_type(air_temp, aerosols):
    if air_temp < -10 or air_temp > 50:  # Example reasonable range
        return 'Invalid air temperature value'
    if aerosols < -999 or aerosols > 4:
        return 'Invalid aerosols value'
    
    # Predict the land type using the trained model
    prediction = clf.predict([[air_temp, aerosols]])
    return prediction[0]

# Test the function with sample inputs
air_temp = 45
aerosols = 2
print(f"Predicted land type: {predict_land_type(air_temp, aerosols)}")


Predicted land type: Heat-Resilient Urban Infrastructure




In [26]:
with open('atmosphere_model.pickle','wb') as f:
    pickle.dump(clf,f)

In [27]:
columns={
    'data_columns':[col.lower() for col in X.columns]
}
with open("columns.json","w")as f:
    f.write(json.dumps(columns))